Seth Kulick commited on
Commit
758da68
·
1 Parent(s): f86aa59
Files changed (2) hide show
  1. loss.tsv +50 -2
  2. training.log +784 -53
loss.tsv CHANGED
@@ -1,3 +1,51 @@
1
  EPOCH TIMESTAMP LEARNING_RATE TRAIN_LOSS DEV_LOSS DEV_PRECISION DEV_RECALL DEV_F1 DEV_ACCURACY
2
- 1 13:29:52 0.0000 1.7334235176429078 0.3509514629840851 0.9331 0.9331 0.9331 0.9331
3
- 2 13:38:31 0.0000 0.5678914814638107 0.23018118739128113 0.9562 0.9562 0.9562 0.9562
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  EPOCH TIMESTAMP LEARNING_RATE TRAIN_LOSS DEV_LOSS DEV_PRECISION DEV_RECALL DEV_F1 DEV_ACCURACY
2
+ 1 15:17:14 0.0000 4.21414778311098 1.6606154441833496 0.7017 0.7017 0.7017 0.7017
3
+ 2 15:24:56 0.0000 1.236230191219479 0.4555579721927643 0.9132 0.9132 0.9132 0.9132
4
+ 3 15:31:40 0.0000 0.649921288465607 0.247285857796669 0.9518 0.9518 0.9518 0.9518
5
+ 4 15:38:12 0.0000 0.49171242865394643 0.18006576597690582 0.9648 0.9648 0.9648 0.9648
6
+ 5 15:45:54 0.0000 0.42244408257393484 0.15854212641716003 0.9715 0.9715 0.9715 0.9715
7
+ 6 15:53:34 0.0000 0.38888915953073067 0.1478930115699768 0.9729 0.9729 0.9729 0.9729
8
+ 7 16:01:47 0.0000 0.36700320148296955 0.14240729808807373 0.9717 0.9717 0.9717 0.9717
9
+ 8 16:09:22 0.0000 0.34955964649357035 0.13401205837726593 0.9752 0.9752 0.9752 0.9752
10
+ 9 16:17:02 0.0000 0.3329301695936556 0.14190562069416046 0.9764 0.9764 0.9764 0.9764
11
+ 10 16:24:41 0.0000 0.3320703285608999 0.1481310874223709 0.9734 0.9734 0.9734 0.9734
12
+ 11 16:32:16 0.0000 0.3201111280509732 0.16022486984729767 0.9744 0.9744 0.9744 0.9744
13
+ 12 16:39:54 0.0000 0.3146267273126732 0.17038877308368683 0.9764 0.9764 0.9764 0.9764
14
+ 13 16:47:31 0.0000 0.3056304733563553 0.16180633008480072 0.9766 0.9766 0.9766 0.9766
15
+ 14 16:55:11 0.0000 0.2946385615949501 0.1961415857076645 0.9729 0.9729 0.9729 0.9729
16
+ 15 17:02:50 0.0000 0.29707305155274893 0.21415923535823822 0.9737 0.9737 0.9737 0.9737
17
+ 16 17:10:27 0.0000 0.28580348285942486 0.17488490045070648 0.9764 0.9764 0.9764 0.9764
18
+ 17 17:18:07 0.0000 0.28452900538217474 0.1961992233991623 0.9764 0.9764 0.9764 0.9764
19
+ 18 17:25:46 0.0000 0.286532418628909 0.18113288283348083 0.9781 0.9781 0.9781 0.9781
20
+ 19 17:33:26 0.0000 0.2808003542003455 0.2043328434228897 0.9793 0.9793 0.9793 0.9793
21
+ 20 17:41:09 0.0000 0.28836057751744903 0.17976026237010956 0.9798 0.9798 0.9798 0.9798
22
+ 21 17:48:53 0.0000 0.276102740426533 0.20532046258449554 0.9808 0.9808 0.9808 0.9808
23
+ 22 17:56:37 0.0000 0.27382087732996463 0.20975473523139954 0.9771 0.9771 0.9771 0.9771
24
+ 23 18:04:16 0.0000 0.27393156456791734 0.21456189453601837 0.9796 0.9796 0.9796 0.9796
25
+ 24 18:12:01 0.0000 0.2696315985537938 0.21408958733081818 0.9788 0.9788 0.9788 0.9788
26
+ 25 18:19:51 0.0000 0.2604978712176271 0.20778048038482666 0.9801 0.9801 0.9801 0.9801
27
+ 26 18:27:24 0.0000 0.2631420220409018 0.22401468455791473 0.9786 0.9786 0.9786 0.9786
28
+ 27 18:35:00 0.0000 0.2655839982462426 0.2324327975511551 0.9771 0.9771 0.9771 0.9771
29
+ 28 18:42:35 0.0000 0.2628011544988305 0.21640333533287048 0.9803 0.9803 0.9803 0.9803
30
+ 29 18:50:09 0.0000 0.26550006090015277 0.24248327314853668 0.9796 0.9796 0.9796 0.9796
31
+ 30 18:57:46 0.0000 0.2652689226998264 0.23393450677394867 0.9776 0.9776 0.9776 0.9776
32
+ 31 19:05:25 0.0000 0.25939785844109664 0.22774212062358856 0.9788 0.9788 0.9788 0.9788
33
+ 32 19:13:25 0.0000 0.2645543534505578 0.22920973598957062 0.9793 0.9793 0.9793 0.9793
34
+ 33 19:21:02 0.0000 0.255237703400159 0.23627179861068726 0.9791 0.9791 0.9791 0.9791
35
+ 34 19:28:40 0.0000 0.2595120500430324 0.23955273628234863 0.9796 0.9796 0.9796 0.9796
36
+ 35 19:36:19 0.0000 0.2611494515697348 0.24450713396072388 0.9791 0.9791 0.9791 0.9791
37
+ 36 19:43:57 0.0000 0.25360077463430586 0.2530966103076935 0.9788 0.9788 0.9788 0.9788
38
+ 37 19:51:34 0.0000 0.26495934852662506 0.2624962031841278 0.9781 0.9781 0.9781 0.9781
39
+ 38 19:59:11 0.0000 0.25416150340144184 0.24244999885559082 0.9788 0.9788 0.9788 0.9788
40
+ 39 20:06:48 0.0000 0.257929350459372 0.24615894258022308 0.9798 0.9798 0.9798 0.9798
41
+ 40 20:14:39 0.0000 0.25402286565305776 0.2575598359107971 0.9791 0.9791 0.9791 0.9791
42
+ 41 20:22:19 0.0000 0.2508873656720227 0.25127604603767395 0.9786 0.9786 0.9786 0.9786
43
+ 42 20:29:58 0.0000 0.2584042182083517 0.2509002983570099 0.9776 0.9776 0.9776 0.9776
44
+ 43 20:37:36 0.0000 0.2535730162199338 0.25337928533554077 0.9784 0.9784 0.9784 0.9784
45
+ 44 20:45:14 0.0000 0.24615347105615198 0.25915977358818054 0.9784 0.9784 0.9784 0.9784
46
+ 45 20:52:52 0.0000 0.2502548443814474 0.2550533413887024 0.9788 0.9788 0.9788 0.9788
47
+ 46 21:00:31 0.0000 0.24752661908553505 0.2502936124801636 0.9796 0.9796 0.9796 0.9796
48
+ 47 21:08:08 0.0000 0.25003396999949856 0.25260353088378906 0.9788 0.9788 0.9788 0.9788
49
+ 48 21:15:46 0.0000 0.2562181207417887 0.2517630159854889 0.9793 0.9793 0.9793 0.9793
50
+ 49 21:23:25 0.0000 0.2479874323703076 0.25181668996810913 0.9786 0.9786 0.9786 0.9786
51
+ 50 21:31:03 0.0000 0.2503007775652108 0.2513697147369385 0.9784 0.9784 0.9784 0.9784
training.log CHANGED
@@ -1,5 +1,5 @@
1
- 2023-08-17 13:20:08,970 ----------------------------------------------------------------------------------------------------
2
- 2023-08-17 13:20:08,977 Model: "SequenceTagger(
3
  (embeddings): TransformerWordEmbeddings(
4
  (model): XLMRobertaModel(
5
  (embeddings): XLMRobertaEmbeddings(
@@ -313,54 +313,785 @@
313
  (loss_function): ViterbiLoss()
314
  (crf): CRF()
315
  )"
316
- 2023-08-17 13:20:08,995 ----------------------------------------------------------------------------------------------------
317
- 2023-08-17 13:20:08,996 Corpus: "Corpus: 7767 train + 409 dev + 0 test sentences"
318
- 2023-08-17 13:20:08,997 ----------------------------------------------------------------------------------------------------
319
- 2023-08-17 13:20:08,997 Parameters:
320
- 2023-08-17 13:20:08,997 - learning_rate: "0.000050"
321
- 2023-08-17 13:20:08,998 - mini_batch_size: "32"
322
- 2023-08-17 13:20:08,998 - patience: "3"
323
- 2023-08-17 13:20:08,998 - anneal_factor: "0.5"
324
- 2023-08-17 13:20:08,999 - max_epochs: "2"
325
- 2023-08-17 13:20:08,999 - shuffle: "True"
326
- 2023-08-17 13:20:09,000 - train_with_dev: "False"
327
- 2023-08-17 13:20:09,000 - batch_growth_annealing: "False"
328
- 2023-08-17 13:20:09,000 ----------------------------------------------------------------------------------------------------
329
- 2023-08-17 13:20:09,001 Model training base path: "/scratch/skulick/ppchy-11-pos/xlmb-ck05-yid1/split_final/train"
330
- 2023-08-17 13:20:09,001 ----------------------------------------------------------------------------------------------------
331
- 2023-08-17 13:20:09,001 Device: cuda:0
332
- 2023-08-17 13:20:09,002 ----------------------------------------------------------------------------------------------------
333
- 2023-08-17 13:20:09,002 Embeddings storage mode: none
334
- 2023-08-17 13:20:09,002 ----------------------------------------------------------------------------------------------------
335
- 2023-08-17 13:21:05,834 epoch 1 - iter 24/243 - loss 5.52841502 - time (sec): 56.83 - samples/sec: 131.44 - lr: 0.000025
336
- 2023-08-17 13:22:03,318 epoch 1 - iter 48/243 - loss 4.70686211 - time (sec): 114.32 - samples/sec: 130.45 - lr: 0.000050
337
- 2023-08-17 13:23:00,549 epoch 1 - iter 72/243 - loss 3.86110162 - time (sec): 171.55 - samples/sec: 131.94 - lr: 0.000047
338
- 2023-08-17 13:23:57,695 epoch 1 - iter 96/243 - loss 3.22106003 - time (sec): 228.69 - samples/sec: 132.37 - lr: 0.000045
339
- 2023-08-17 13:24:55,039 epoch 1 - iter 120/243 - loss 2.77518007 - time (sec): 286.04 - samples/sec: 132.92 - lr: 0.000042
340
- 2023-08-17 13:25:52,345 epoch 1 - iter 144/243 - loss 2.46009763 - time (sec): 343.34 - samples/sec: 133.06 - lr: 0.000039
341
- 2023-08-17 13:26:49,831 epoch 1 - iter 168/243 - loss 2.21288400 - time (sec): 400.83 - samples/sec: 134.04 - lr: 0.000036
342
- 2023-08-17 13:27:47,964 epoch 1 - iter 192/243 - loss 2.01670410 - time (sec): 458.96 - samples/sec: 134.63 - lr: 0.000034
343
- 2023-08-17 13:28:45,494 epoch 1 - iter 216/243 - loss 1.86783335 - time (sec): 516.49 - samples/sec: 134.47 - lr: 0.000031
344
- 2023-08-17 13:29:43,119 epoch 1 - iter 240/243 - loss 1.74523925 - time (sec): 574.12 - samples/sec: 135.25 - lr: 0.000028
345
- 2023-08-17 13:29:50,011 ----------------------------------------------------------------------------------------------------
346
- 2023-08-17 13:29:50,011 EPOCH 1 done: loss 1.7334 - lr 0.000028
347
- 2023-08-17 13:29:52,277 Evaluating as a multi-label problem: False
348
- 2023-08-17 13:29:52,376 DEV : loss 0.3509514629840851 - f1-score (micro avg) 0.9331
349
- 2023-08-17 13:29:52,410 saving best model
350
- 2023-08-17 13:29:54,774 ----------------------------------------------------------------------------------------------------
351
- 2023-08-17 13:30:44,972 epoch 2 - iter 24/243 - loss 0.58877620 - time (sec): 50.20 - samples/sec: 152.66 - lr: 0.000025
352
- 2023-08-17 13:31:36,455 epoch 2 - iter 48/243 - loss 0.60804646 - time (sec): 101.68 - samples/sec: 152.75 - lr: 0.000022
353
- 2023-08-17 13:32:27,132 epoch 2 - iter 72/243 - loss 0.60136722 - time (sec): 152.36 - samples/sec: 153.64 - lr: 0.000020
354
- 2023-08-17 13:33:17,902 epoch 2 - iter 96/243 - loss 0.59255541 - time (sec): 203.13 - samples/sec: 154.55 - lr: 0.000017
355
- 2023-08-17 13:34:08,949 epoch 2 - iter 120/243 - loss 0.58957421 - time (sec): 254.17 - samples/sec: 154.79 - lr: 0.000014
356
- 2023-08-17 13:35:00,256 epoch 2 - iter 144/243 - loss 0.58878210 - time (sec): 305.48 - samples/sec: 154.48 - lr: 0.000011
357
- 2023-08-17 13:35:51,214 epoch 2 - iter 168/243 - loss 0.58168957 - time (sec): 356.44 - samples/sec: 153.84 - lr: 0.000009
358
- 2023-08-17 13:36:42,167 epoch 2 - iter 192/243 - loss 0.57403444 - time (sec): 407.39 - samples/sec: 153.55 - lr: 0.000006
359
- 2023-08-17 13:37:32,761 epoch 2 - iter 216/243 - loss 0.57331317 - time (sec): 457.99 - samples/sec: 152.68 - lr: 0.000003
360
- 2023-08-17 13:38:23,745 epoch 2 - iter 240/243 - loss 0.56849021 - time (sec): 508.97 - samples/sec: 152.71 - lr: 0.000000
361
- 2023-08-17 13:38:29,500 ----------------------------------------------------------------------------------------------------
362
- 2023-08-17 13:38:29,500 EPOCH 2 done: loss 0.5679 - lr 0.000000
363
- 2023-08-17 13:38:31,769 Evaluating as a multi-label problem: False
364
- 2023-08-17 13:38:31,868 DEV : loss 0.23018118739128113 - f1-score (micro avg) 0.9562
365
- 2023-08-17 13:38:31,902 saving best model
366
- 2023-08-17 13:38:37,560 Test data not provided setting final score to 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2023-08-17 15:09:23,831 ----------------------------------------------------------------------------------------------------
2
+ 2023-08-17 15:09:23,833 Model: "SequenceTagger(
3
  (embeddings): TransformerWordEmbeddings(
4
  (model): XLMRobertaModel(
5
  (embeddings): XLMRobertaEmbeddings(
 
313
  (loss_function): ViterbiLoss()
314
  (crf): CRF()
315
  )"
316
+ 2023-08-17 15:09:23,833 ----------------------------------------------------------------------------------------------------
317
+ 2023-08-17 15:09:23,833 Corpus: "Corpus: 7767 train + 409 dev + 0 test sentences"
318
+ 2023-08-17 15:09:23,833 ----------------------------------------------------------------------------------------------------
319
+ 2023-08-17 15:09:23,833 Parameters:
320
+ 2023-08-17 15:09:23,833 - learning_rate: "0.000050"
321
+ 2023-08-17 15:09:23,833 - mini_batch_size: "32"
322
+ 2023-08-17 15:09:23,833 - patience: "3"
323
+ 2023-08-17 15:09:23,834 - anneal_factor: "0.5"
324
+ 2023-08-17 15:09:23,834 - max_epochs: "50"
325
+ 2023-08-17 15:09:23,834 - shuffle: "True"
326
+ 2023-08-17 15:09:23,834 - train_with_dev: "False"
327
+ 2023-08-17 15:09:23,834 - batch_growth_annealing: "False"
328
+ 2023-08-17 15:09:23,834 ----------------------------------------------------------------------------------------------------
329
+ 2023-08-17 15:09:23,834 Model training base path: "/scratch/skulick/ppchy-11-pos/xlmb-ck05-yid1/split_final/train"
330
+ 2023-08-17 15:09:23,834 ----------------------------------------------------------------------------------------------------
331
+ 2023-08-17 15:09:23,834 Device: cuda:0
332
+ 2023-08-17 15:09:23,834 ----------------------------------------------------------------------------------------------------
333
+ 2023-08-17 15:09:23,834 Embeddings storage mode: none
334
+ 2023-08-17 15:09:23,834 ----------------------------------------------------------------------------------------------------
335
+ 2023-08-17 15:10:10,017 epoch 1 - iter 24/243 - loss 5.78182875 - time (sec): 46.18 - samples/sec: 161.75 - lr: 0.000001
336
+ 2023-08-17 15:10:55,426 epoch 1 - iter 48/243 - loss 5.72562134 - time (sec): 91.59 - samples/sec: 162.81 - lr: 0.000002
337
+ 2023-08-17 15:11:40,686 epoch 1 - iter 72/243 - loss 5.61608578 - time (sec): 136.85 - samples/sec: 165.39 - lr: 0.000003
338
+ 2023-08-17 15:12:25,840 epoch 1 - iter 96/243 - loss 5.47788448 - time (sec): 182.01 - samples/sec: 166.33 - lr: 0.000004
339
+ 2023-08-17 15:13:11,314 epoch 1 - iter 120/243 - loss 5.26991238 - time (sec): 227.48 - samples/sec: 167.13 - lr: 0.000005
340
+ 2023-08-17 15:13:56,587 epoch 1 - iter 144/243 - loss 5.07404788 - time (sec): 272.75 - samples/sec: 167.50 - lr: 0.000006
341
+ 2023-08-17 15:14:42,092 epoch 1 - iter 168/243 - loss 4.86972776 - time (sec): 318.26 - samples/sec: 168.81 - lr: 0.000007
342
+ 2023-08-17 15:15:28,715 epoch 1 - iter 192/243 - loss 4.66109804 - time (sec): 364.88 - samples/sec: 169.34 - lr: 0.000008
343
+ 2023-08-17 15:16:17,849 epoch 1 - iter 216/243 - loss 4.44788101 - time (sec): 414.01 - samples/sec: 167.75 - lr: 0.000009
344
+ 2023-08-17 15:17:07,259 epoch 1 - iter 240/243 - loss 4.23693631 - time (sec): 463.43 - samples/sec: 167.55 - lr: 0.000010
345
+ 2023-08-17 15:17:12,895 ----------------------------------------------------------------------------------------------------
346
+ 2023-08-17 15:17:12,896 EPOCH 1 done: loss 4.2141 - lr 0.000010
347
+ 2023-08-17 15:17:14,671 Evaluating as a multi-label problem: False
348
+ 2023-08-17 15:17:14,718 DEV : loss 1.6606154441833496 - f1-score (micro avg) 0.7017
349
+ 2023-08-17 15:17:14,730 saving best model
350
+ 2023-08-17 15:17:17,173 ----------------------------------------------------------------------------------------------------
351
+ 2023-08-17 15:18:02,076 epoch 2 - iter 24/243 - loss 1.93643008 - time (sec): 44.90 - samples/sec: 170.66 - lr: 0.000011
352
+ 2023-08-17 15:18:47,637 epoch 2 - iter 48/243 - loss 1.80170810 - time (sec): 90.46 - samples/sec: 171.69 - lr: 0.000012
353
+ 2023-08-17 15:19:32,933 epoch 2 - iter 72/243 - loss 1.68553020 - time (sec): 135.76 - samples/sec: 172.42 - lr: 0.000013
354
+ 2023-08-17 15:20:18,356 epoch 2 - iter 96/243 - loss 1.59018149 - time (sec): 181.18 - samples/sec: 173.27 - lr: 0.000014
355
+ 2023-08-17 15:21:02,921 epoch 2 - iter 120/243 - loss 1.51168641 - time (sec): 225.75 - samples/sec: 174.28 - lr: 0.000015
356
+ 2023-08-17 15:21:49,033 epoch 2 - iter 144/243 - loss 1.44496232 - time (sec): 271.86 - samples/sec: 173.59 - lr: 0.000016
357
+ 2023-08-17 15:22:34,169 epoch 2 - iter 168/243 - loss 1.38343183 - time (sec): 316.99 - samples/sec: 172.99 - lr: 0.000017
358
+ 2023-08-17 15:23:19,548 epoch 2 - iter 192/243 - loss 1.32848150 - time (sec): 362.37 - samples/sec: 172.63 - lr: 0.000018
359
+ 2023-08-17 15:24:04,573 epoch 2 - iter 216/243 - loss 1.28678633 - time (sec): 407.40 - samples/sec: 171.64 - lr: 0.000019
360
+ 2023-08-17 15:24:49,925 epoch 2 - iter 240/243 - loss 1.24063251 - time (sec): 452.75 - samples/sec: 171.67 - lr: 0.000020
361
+ 2023-08-17 15:24:55,070 ----------------------------------------------------------------------------------------------------
362
+ 2023-08-17 15:24:55,070 EPOCH 2 done: loss 1.2362 - lr 0.000020
363
+ 2023-08-17 15:24:56,786 Evaluating as a multi-label problem: False
364
+ 2023-08-17 15:24:56,828 DEV : loss 0.4555579721927643 - f1-score (micro avg) 0.9132
365
+ 2023-08-17 15:24:56,838 saving best model
366
+ 2023-08-17 15:25:00,108 ----------------------------------------------------------------------------------------------------
367
+ 2023-08-17 15:25:38,120 epoch 3 - iter 24/243 - loss 0.80478615 - time (sec): 38.01 - samples/sec: 213.04 - lr: 0.000021
368
+ 2023-08-17 15:26:16,111 epoch 3 - iter 48/243 - loss 0.76412570 - time (sec): 76.00 - samples/sec: 209.83 - lr: 0.000022
369
+ 2023-08-17 15:26:53,828 epoch 3 - iter 72/243 - loss 0.74620943 - time (sec): 113.72 - samples/sec: 207.71 - lr: 0.000023
370
+ 2023-08-17 15:27:31,562 epoch 3 - iter 96/243 - loss 0.72917808 - time (sec): 151.45 - samples/sec: 206.81 - lr: 0.000024
371
+ 2023-08-17 15:28:09,535 epoch 3 - iter 120/243 - loss 0.72089137 - time (sec): 189.43 - samples/sec: 207.05 - lr: 0.000025
372
+ 2023-08-17 15:28:47,278 epoch 3 - iter 144/243 - loss 0.70075087 - time (sec): 227.17 - samples/sec: 206.59 - lr: 0.000026
373
+ 2023-08-17 15:29:24,943 epoch 3 - iter 168/243 - loss 0.68433087 - time (sec): 264.83 - samples/sec: 205.80 - lr: 0.000027
374
+ 2023-08-17 15:30:02,956 epoch 3 - iter 192/243 - loss 0.67039041 - time (sec): 302.85 - samples/sec: 205.90 - lr: 0.000028
375
+ 2023-08-17 15:30:48,317 epoch 3 - iter 216/243 - loss 0.66061953 - time (sec): 348.21 - samples/sec: 201.37 - lr: 0.000029
376
+ 2023-08-17 15:31:33,554 epoch 3 - iter 240/243 - loss 0.65094446 - time (sec): 393.45 - samples/sec: 197.60 - lr: 0.000030
377
+ 2023-08-17 15:31:38,749 ----------------------------------------------------------------------------------------------------
378
+ 2023-08-17 15:31:38,749 EPOCH 3 done: loss 0.6499 - lr 0.000030
379
+ 2023-08-17 15:31:40,552 Evaluating as a multi-label problem: False
380
+ 2023-08-17 15:31:40,599 DEV : loss 0.247285857796669 - f1-score (micro avg) 0.9518
381
+ 2023-08-17 15:31:40,610 saving best model
382
+ 2023-08-17 15:31:44,059 ----------------------------------------------------------------------------------------------------
383
+ 2023-08-17 15:32:21,941 epoch 4 - iter 24/243 - loss 0.55472967 - time (sec): 37.88 - samples/sec: 206.88 - lr: 0.000031
384
+ 2023-08-17 15:32:59,740 epoch 4 - iter 48/243 - loss 0.52360637 - time (sec): 75.68 - samples/sec: 206.04 - lr: 0.000032
385
+ 2023-08-17 15:33:37,568 epoch 4 - iter 72/243 - loss 0.51655667 - time (sec): 113.51 - samples/sec: 205.40 - lr: 0.000033
386
+ 2023-08-17 15:34:15,494 epoch 4 - iter 96/243 - loss 0.51891961 - time (sec): 151.43 - samples/sec: 206.17 - lr: 0.000034
387
+ 2023-08-17 15:34:53,455 epoch 4 - iter 120/243 - loss 0.50631556 - time (sec): 189.40 - samples/sec: 207.02 - lr: 0.000035
388
+ 2023-08-17 15:35:31,228 epoch 4 - iter 144/243 - loss 0.50459545 - time (sec): 227.17 - samples/sec: 206.28 - lr: 0.000036
389
+ 2023-08-17 15:36:09,104 epoch 4 - iter 168/243 - loss 0.50045519 - time (sec): 265.04 - samples/sec: 206.34 - lr: 0.000037
390
+ 2023-08-17 15:36:46,998 epoch 4 - iter 192/243 - loss 0.49446570 - time (sec): 302.94 - samples/sec: 206.26 - lr: 0.000038
391
+ 2023-08-17 15:37:24,825 epoch 4 - iter 216/243 - loss 0.49218271 - time (sec): 340.77 - samples/sec: 206.17 - lr: 0.000039
392
+ 2023-08-17 15:38:04,670 epoch 4 - iter 240/243 - loss 0.49159525 - time (sec): 380.61 - samples/sec: 204.16 - lr: 0.000040
393
+ 2023-08-17 15:38:10,286 ----------------------------------------------------------------------------------------------------
394
+ 2023-08-17 15:38:10,286 EPOCH 4 done: loss 0.4917 - lr 0.000040
395
+ 2023-08-17 15:38:12,046 Evaluating as a multi-label problem: False
396
+ 2023-08-17 15:38:12,088 DEV : loss 0.18006576597690582 - f1-score (micro avg) 0.9648
397
+ 2023-08-17 15:38:12,098 saving best model
398
+ 2023-08-17 15:38:15,361 ----------------------------------------------------------------------------------------------------
399
+ 2023-08-17 15:39:00,518 epoch 5 - iter 24/243 - loss 0.45058356 - time (sec): 45.16 - samples/sec: 172.33 - lr: 0.000041
400
+ 2023-08-17 15:39:45,761 epoch 5 - iter 48/243 - loss 0.43329992 - time (sec): 90.40 - samples/sec: 172.17 - lr: 0.000042
401
+ 2023-08-17 15:40:31,171 epoch 5 - iter 72/243 - loss 0.43373609 - time (sec): 135.81 - samples/sec: 173.01 - lr: 0.000043
402
+ 2023-08-17 15:41:16,417 epoch 5 - iter 96/243 - loss 0.43090189 - time (sec): 181.06 - samples/sec: 172.32 - lr: 0.000044
403
+ 2023-08-17 15:42:01,492 epoch 5 - iter 120/243 - loss 0.42730629 - time (sec): 226.13 - samples/sec: 171.36 - lr: 0.000045
404
+ 2023-08-17 15:42:46,685 epoch 5 - iter 144/243 - loss 0.42510607 - time (sec): 271.32 - samples/sec: 171.77 - lr: 0.000046
405
+ 2023-08-17 15:43:31,959 epoch 5 - iter 168/243 - loss 0.42354677 - time (sec): 316.60 - samples/sec: 172.04 - lr: 0.000047
406
+ 2023-08-17 15:44:17,298 epoch 5 - iter 192/243 - loss 0.42562343 - time (sec): 361.94 - samples/sec: 172.17 - lr: 0.000048
407
+ 2023-08-17 15:45:02,525 epoch 5 - iter 216/243 - loss 0.42329549 - time (sec): 407.16 - samples/sec: 172.19 - lr: 0.000049
408
+ 2023-08-17 15:45:47,605 epoch 5 - iter 240/243 - loss 0.42313631 - time (sec): 452.24 - samples/sec: 171.94 - lr: 0.000050
409
+ 2023-08-17 15:45:52,793 ----------------------------------------------------------------------------------------------------
410
+ 2023-08-17 15:45:52,793 EPOCH 5 done: loss 0.4224 - lr 0.000050
411
+ 2023-08-17 15:45:54,570 Evaluating as a multi-label problem: False
412
+ 2023-08-17 15:45:54,612 DEV : loss 0.15854212641716003 - f1-score (micro avg) 0.9715
413
+ 2023-08-17 15:45:54,622 saving best model
414
+ 2023-08-17 15:45:57,885 ----------------------------------------------------------------------------------------------------
415
+ 2023-08-17 15:46:42,689 epoch 6 - iter 24/243 - loss 0.38322411 - time (sec): 44.80 - samples/sec: 169.56 - lr: 0.000050
416
+ 2023-08-17 15:47:27,836 epoch 6 - iter 48/243 - loss 0.38879490 - time (sec): 89.95 - samples/sec: 173.05 - lr: 0.000050
417
+ 2023-08-17 15:48:12,824 epoch 6 - iter 72/243 - loss 0.39501775 - time (sec): 134.94 - samples/sec: 172.82 - lr: 0.000050
418
+ 2023-08-17 15:48:57,713 epoch 6 - iter 96/243 - loss 0.39125526 - time (sec): 179.83 - samples/sec: 171.97 - lr: 0.000050
419
+ 2023-08-17 15:49:42,741 epoch 6 - iter 120/243 - loss 0.38810381 - time (sec): 224.86 - samples/sec: 172.67 - lr: 0.000049
420
+ 2023-08-17 15:50:27,712 epoch 6 - iter 144/243 - loss 0.38859919 - time (sec): 269.83 - samples/sec: 172.46 - lr: 0.000049
421
+ 2023-08-17 15:51:12,538 epoch 6 - iter 168/243 - loss 0.39183603 - time (sec): 314.65 - samples/sec: 171.82 - lr: 0.000049
422
+ 2023-08-17 15:51:57,735 epoch 6 - iter 192/243 - loss 0.39172498 - time (sec): 359.85 - samples/sec: 172.33 - lr: 0.000049
423
+ 2023-08-17 15:52:42,844 epoch 6 - iter 216/243 - loss 0.38755663 - time (sec): 404.96 - samples/sec: 172.78 - lr: 0.000049
424
+ 2023-08-17 15:53:27,836 epoch 6 - iter 240/243 - loss 0.38859503 - time (sec): 449.95 - samples/sec: 172.71 - lr: 0.000049
425
+ 2023-08-17 15:53:32,982 ----------------------------------------------------------------------------------------------------
426
+ 2023-08-17 15:53:32,982 EPOCH 6 done: loss 0.3889 - lr 0.000049
427
+ 2023-08-17 15:53:34,689 Evaluating as a multi-label problem: False
428
+ 2023-08-17 15:53:34,730 DEV : loss 0.1478930115699768 - f1-score (micro avg) 0.9729
429
+ 2023-08-17 15:53:34,740 saving best model
430
+ 2023-08-17 15:53:38,076 ----------------------------------------------------------------------------------------------------
431
+ 2023-08-17 15:54:22,729 epoch 7 - iter 24/243 - loss 0.37119833 - time (sec): 44.65 - samples/sec: 170.23 - lr: 0.000049
432
+ 2023-08-17 15:55:10,411 epoch 7 - iter 48/243 - loss 0.34925497 - time (sec): 92.33 - samples/sec: 165.86 - lr: 0.000049
433
+ 2023-08-17 15:55:59,055 epoch 7 - iter 72/243 - loss 0.36339135 - time (sec): 140.98 - samples/sec: 162.90 - lr: 0.000049
434
+ 2023-08-17 15:56:47,898 epoch 7 - iter 96/243 - loss 0.36053250 - time (sec): 189.82 - samples/sec: 162.96 - lr: 0.000048
435
+ 2023-08-17 15:57:36,721 epoch 7 - iter 120/243 - loss 0.36487615 - time (sec): 238.64 - samples/sec: 163.08 - lr: 0.000048
436
+ 2023-08-17 15:58:25,449 epoch 7 - iter 144/243 - loss 0.36319947 - time (sec): 287.37 - samples/sec: 162.66 - lr: 0.000048
437
+ 2023-08-17 15:59:14,209 epoch 7 - iter 168/243 - loss 0.36321272 - time (sec): 336.13 - samples/sec: 162.36 - lr: 0.000048
438
+ 2023-08-17 16:00:02,929 epoch 7 - iter 192/243 - loss 0.36447693 - time (sec): 384.85 - samples/sec: 161.79 - lr: 0.000048
439
+ 2023-08-17 16:00:51,616 epoch 7 - iter 216/243 - loss 0.36744951 - time (sec): 433.54 - samples/sec: 161.49 - lr: 0.000048
440
+ 2023-08-17 16:01:40,286 epoch 7 - iter 240/243 - loss 0.36634157 - time (sec): 482.21 - samples/sec: 161.26 - lr: 0.000048
441
+ 2023-08-17 16:01:45,814 ----------------------------------------------------------------------------------------------------
442
+ 2023-08-17 16:01:45,814 EPOCH 7 done: loss 0.3670 - lr 0.000048
443
+ 2023-08-17 16:01:47,570 Evaluating as a multi-label problem: False
444
+ 2023-08-17 16:01:47,611 DEV : loss 0.14240729808807373 - f1-score (micro avg) 0.9717
445
+ 2023-08-17 16:01:47,621 ----------------------------------------------------------------------------------------------------
446
+ 2023-08-17 16:02:32,343 epoch 8 - iter 24/243 - loss 0.35991738 - time (sec): 44.72 - samples/sec: 171.01 - lr: 0.000048
447
+ 2023-08-17 16:03:17,025 epoch 8 - iter 48/243 - loss 0.34897131 - time (sec): 89.40 - samples/sec: 170.46 - lr: 0.000048
448
+ 2023-08-17 16:04:01,760 epoch 8 - iter 72/243 - loss 0.34258107 - time (sec): 134.14 - samples/sec: 171.01 - lr: 0.000047
449
+ 2023-08-17 16:04:46,387 epoch 8 - iter 96/243 - loss 0.34457191 - time (sec): 178.77 - samples/sec: 170.25 - lr: 0.000047
450
+ 2023-08-17 16:05:31,060 epoch 8 - iter 120/243 - loss 0.34507195 - time (sec): 223.44 - samples/sec: 170.22 - lr: 0.000047
451
+ 2023-08-17 16:06:16,120 epoch 8 - iter 144/243 - loss 0.34828898 - time (sec): 268.50 - samples/sec: 172.24 - lr: 0.000047
452
+ 2023-08-17 16:07:00,907 epoch 8 - iter 168/243 - loss 0.34938445 - time (sec): 313.29 - samples/sec: 172.01 - lr: 0.000047
453
+ 2023-08-17 16:07:45,925 epoch 8 - iter 192/243 - loss 0.34862273 - time (sec): 358.30 - samples/sec: 172.84 - lr: 0.000047
454
+ 2023-08-17 16:08:30,431 epoch 8 - iter 216/243 - loss 0.34977990 - time (sec): 402.81 - samples/sec: 173.03 - lr: 0.000047
455
+ 2023-08-17 16:09:15,261 epoch 8 - iter 240/243 - loss 0.34875804 - time (sec): 447.64 - samples/sec: 173.22 - lr: 0.000047
456
+ 2023-08-17 16:09:20,506 ----------------------------------------------------------------------------------------------------
457
+ 2023-08-17 16:09:20,506 EPOCH 8 done: loss 0.3496 - lr 0.000047
458
+ 2023-08-17 16:09:22,212 Evaluating as a multi-label problem: False
459
+ 2023-08-17 16:09:22,253 DEV : loss 0.13401205837726593 - f1-score (micro avg) 0.9752
460
+ 2023-08-17 16:09:22,263 saving best model
461
+ 2023-08-17 16:09:25,549 ----------------------------------------------------------------------------------------------------
462
+ 2023-08-17 16:10:10,381 epoch 9 - iter 24/243 - loss 0.33211277 - time (sec): 44.83 - samples/sec: 175.99 - lr: 0.000047
463
+ 2023-08-17 16:10:55,480 epoch 9 - iter 48/243 - loss 0.33508629 - time (sec): 89.93 - samples/sec: 179.50 - lr: 0.000046
464
+ 2023-08-17 16:11:40,218 epoch 9 - iter 72/243 - loss 0.32662985 - time (sec): 134.67 - samples/sec: 176.48 - lr: 0.000046
465
+ 2023-08-17 16:12:25,005 epoch 9 - iter 96/243 - loss 0.32958645 - time (sec): 179.46 - samples/sec: 175.54 - lr: 0.000046
466
+ 2023-08-17 16:13:09,727 epoch 9 - iter 120/243 - loss 0.32364185 - time (sec): 224.18 - samples/sec: 174.64 - lr: 0.000046
467
+ 2023-08-17 16:13:54,450 epoch 9 - iter 144/243 - loss 0.32701429 - time (sec): 268.90 - samples/sec: 173.84 - lr: 0.000046
468
+ 2023-08-17 16:14:39,389 epoch 9 - iter 168/243 - loss 0.33017416 - time (sec): 313.84 - samples/sec: 173.66 - lr: 0.000046
469
+ 2023-08-17 16:15:24,407 epoch 9 - iter 192/243 - loss 0.33104299 - time (sec): 358.86 - samples/sec: 174.42 - lr: 0.000046
470
+ 2023-08-17 16:16:09,180 epoch 9 - iter 216/243 - loss 0.33454509 - time (sec): 403.63 - samples/sec: 174.12 - lr: 0.000046
471
+ 2023-08-17 16:16:55,391 epoch 9 - iter 240/243 - loss 0.33386278 - time (sec): 449.84 - samples/sec: 172.79 - lr: 0.000046
472
+ 2023-08-17 16:17:00,936 ----------------------------------------------------------------------------------------------------
473
+ 2023-08-17 16:17:00,936 EPOCH 9 done: loss 0.3329 - lr 0.000046
474
+ 2023-08-17 16:17:02,693 Evaluating as a multi-label problem: False
475
+ 2023-08-17 16:17:02,735 DEV : loss 0.14190562069416046 - f1-score (micro avg) 0.9764
476
+ 2023-08-17 16:17:02,745 saving best model
477
+ 2023-08-17 16:17:06,156 ----------------------------------------------------------------------------------------------------
478
+ 2023-08-17 16:17:50,936 epoch 10 - iter 24/243 - loss 0.34002265 - time (sec): 44.78 - samples/sec: 171.44 - lr: 0.000045
479
+ 2023-08-17 16:18:35,871 epoch 10 - iter 48/243 - loss 0.33540108 - time (sec): 89.72 - samples/sec: 172.81 - lr: 0.000045
480
+ 2023-08-17 16:19:20,808 epoch 10 - iter 72/243 - loss 0.33399184 - time (sec): 134.65 - samples/sec: 173.73 - lr: 0.000045
481
+ 2023-08-17 16:20:05,630 epoch 10 - iter 96/243 - loss 0.32469492 - time (sec): 179.47 - samples/sec: 173.88 - lr: 0.000045
482
+ 2023-08-17 16:20:50,458 epoch 10 - iter 120/243 - loss 0.32910415 - time (sec): 224.30 - samples/sec: 173.23 - lr: 0.000045
483
+ 2023-08-17 16:21:35,258 epoch 10 - iter 144/243 - loss 0.32899582 - time (sec): 269.10 - samples/sec: 173.34 - lr: 0.000045
484
+ 2023-08-17 16:22:20,259 epoch 10 - iter 168/243 - loss 0.33093813 - time (sec): 314.10 - samples/sec: 174.21 - lr: 0.000045
485
+ 2023-08-17 16:23:04,924 epoch 10 - iter 192/243 - loss 0.33208597 - time (sec): 358.77 - samples/sec: 173.56 - lr: 0.000045
486
+ 2023-08-17 16:23:49,840 epoch 10 - iter 216/243 - loss 0.33175324 - time (sec): 403.68 - samples/sec: 173.87 - lr: 0.000045
487
+ 2023-08-17 16:24:34,516 epoch 10 - iter 240/243 - loss 0.33262740 - time (sec): 448.36 - samples/sec: 173.38 - lr: 0.000044
488
+ 2023-08-17 16:24:39,626 ----------------------------------------------------------------------------------------------------
489
+ 2023-08-17 16:24:39,626 EPOCH 10 done: loss 0.3321 - lr 0.000044
490
+ 2023-08-17 16:24:41,766 Evaluating as a multi-label problem: False
491
+ 2023-08-17 16:24:41,807 DEV : loss 0.1481310874223709 - f1-score (micro avg) 0.9734
492
+ 2023-08-17 16:24:41,817 ----------------------------------------------------------------------------------------------------
493
+ 2023-08-17 16:25:26,591 epoch 11 - iter 24/243 - loss 0.33230355 - time (sec): 44.77 - samples/sec: 172.33 - lr: 0.000044
494
+ 2023-08-17 16:26:11,224 epoch 11 - iter 48/243 - loss 0.32441123 - time (sec): 89.41 - samples/sec: 171.48 - lr: 0.000044
495
+ 2023-08-17 16:26:56,065 epoch 11 - iter 72/243 - loss 0.32514673 - time (sec): 134.25 - samples/sec: 173.67 - lr: 0.000044
496
+ 2023-08-17 16:27:40,891 epoch 11 - iter 96/243 - loss 0.32235685 - time (sec): 179.07 - samples/sec: 174.42 - lr: 0.000044
497
+ 2023-08-17 16:28:25,572 epoch 11 - iter 120/243 - loss 0.31705674 - time (sec): 223.76 - samples/sec: 174.02 - lr: 0.000044
498
+ 2023-08-17 16:29:10,293 epoch 11 - iter 144/243 - loss 0.31351156 - time (sec): 268.48 - samples/sec: 173.36 - lr: 0.000044
499
+ 2023-08-17 16:29:55,140 epoch 11 - iter 168/243 - loss 0.31453443 - time (sec): 313.32 - samples/sec: 173.48 - lr: 0.000044
500
+ 2023-08-17 16:30:40,066 epoch 11 - iter 192/243 - loss 0.32048855 - time (sec): 358.25 - samples/sec: 174.16 - lr: 0.000044
501
+ 2023-08-17 16:31:24,762 epoch 11 - iter 216/243 - loss 0.31914298 - time (sec): 402.94 - samples/sec: 174.04 - lr: 0.000043
502
+ 2023-08-17 16:32:09,513 epoch 11 - iter 240/243 - loss 0.31938530 - time (sec): 447.70 - samples/sec: 173.67 - lr: 0.000043
503
+ 2023-08-17 16:32:14,601 ----------------------------------------------------------------------------------------------------
504
+ 2023-08-17 16:32:14,601 EPOCH 11 done: loss 0.3201 - lr 0.000043
505
+ 2023-08-17 16:32:16,321 Evaluating as a multi-label problem: False
506
+ 2023-08-17 16:32:16,363 DEV : loss 0.16022486984729767 - f1-score (micro avg) 0.9744
507
+ 2023-08-17 16:32:16,373 ----------------------------------------------------------------------------------------------------
508
+ 2023-08-17 16:33:01,344 epoch 12 - iter 24/243 - loss 0.30634651 - time (sec): 44.97 - samples/sec: 171.07 - lr: 0.000043
509
+ 2023-08-17 16:33:46,129 epoch 12 - iter 48/243 - loss 0.32055500 - time (sec): 89.76 - samples/sec: 170.39 - lr: 0.000043
510
+ 2023-08-17 16:34:31,093 epoch 12 - iter 72/243 - loss 0.31591461 - time (sec): 134.72 - samples/sec: 170.64 - lr: 0.000043
511
+ 2023-08-17 16:35:16,128 epoch 12 - iter 96/243 - loss 0.31720616 - time (sec): 179.75 - samples/sec: 171.27 - lr: 0.000043
512
+ 2023-08-17 16:36:01,471 epoch 12 - iter 120/243 - loss 0.31877634 - time (sec): 225.10 - samples/sec: 171.89 - lr: 0.000043
513
+ 2023-08-17 16:36:46,609 epoch 12 - iter 144/243 - loss 0.31817728 - time (sec): 270.24 - samples/sec: 172.07 - lr: 0.000043
514
+ 2023-08-17 16:37:31,814 epoch 12 - iter 168/243 - loss 0.31409341 - time (sec): 315.44 - samples/sec: 172.47 - lr: 0.000043
515
+ 2023-08-17 16:38:16,688 epoch 12 - iter 192/243 - loss 0.31475214 - time (sec): 360.31 - samples/sec: 172.05 - lr: 0.000042
516
+ 2023-08-17 16:39:01,927 epoch 12 - iter 216/243 - loss 0.31439205 - time (sec): 405.55 - samples/sec: 172.30 - lr: 0.000042
517
+ 2023-08-17 16:39:47,113 epoch 12 - iter 240/243 - loss 0.31462372 - time (sec): 450.74 - samples/sec: 172.47 - lr: 0.000042
518
+ 2023-08-17 16:39:52,253 ----------------------------------------------------------------------------------------------------
519
+ 2023-08-17 16:39:52,253 EPOCH 12 done: loss 0.3146 - lr 0.000042
520
+ 2023-08-17 16:39:54,063 Evaluating as a multi-label problem: False
521
+ 2023-08-17 16:39:54,111 DEV : loss 0.17038877308368683 - f1-score (micro avg) 0.9764
522
+ 2023-08-17 16:39:54,122 ----------------------------------------------------------------------------------------------------
523
+ 2023-08-17 16:40:38,914 epoch 13 - iter 24/243 - loss 0.30871471 - time (sec): 44.79 - samples/sec: 167.78 - lr: 0.000042
524
+ 2023-08-17 16:41:23,903 epoch 13 - iter 48/243 - loss 0.30951571 - time (sec): 89.78 - samples/sec: 168.30 - lr: 0.000042
525
+ 2023-08-17 16:42:08,900 epoch 13 - iter 72/243 - loss 0.30146253 - time (sec): 134.78 - samples/sec: 169.58 - lr: 0.000042
526
+ 2023-08-17 16:42:53,948 epoch 13 - iter 96/243 - loss 0.29818491 - time (sec): 179.83 - samples/sec: 170.69 - lr: 0.000042
527
+ 2023-08-17 16:43:38,884 epoch 13 - iter 120/243 - loss 0.29829818 - time (sec): 224.76 - samples/sec: 170.52 - lr: 0.000042
528
+ 2023-08-17 16:44:24,073 epoch 13 - iter 144/243 - loss 0.31111593 - time (sec): 269.95 - samples/sec: 170.84 - lr: 0.000042
529
+ 2023-08-17 16:45:09,241 epoch 13 - iter 168/243 - loss 0.31147702 - time (sec): 315.12 - samples/sec: 171.02 - lr: 0.000041
530
+ 2023-08-17 16:45:54,422 epoch 13 - iter 192/243 - loss 0.30976085 - time (sec): 360.30 - samples/sec: 171.93 - lr: 0.000041
531
+ 2023-08-17 16:46:39,668 epoch 13 - iter 216/243 - loss 0.30904370 - time (sec): 405.55 - samples/sec: 172.11 - lr: 0.000041
532
+ 2023-08-17 16:47:24,802 epoch 13 - iter 240/243 - loss 0.30572837 - time (sec): 450.68 - samples/sec: 172.49 - lr: 0.000041
533
+ 2023-08-17 16:47:29,930 ----------------------------------------------------------------------------------------------------
534
+ 2023-08-17 16:47:29,930 EPOCH 13 done: loss 0.3056 - lr 0.000041
535
+ 2023-08-17 16:47:31,653 Evaluating as a multi-label problem: False
536
+ 2023-08-17 16:47:31,695 DEV : loss 0.16180633008480072 - f1-score (micro avg) 0.9766
537
+ 2023-08-17 16:47:31,705 saving best model
538
+ 2023-08-17 16:47:34,973 ----------------------------------------------------------------------------------------------------
539
+ 2023-08-17 16:48:19,564 epoch 14 - iter 24/243 - loss 0.28577045 - time (sec): 44.59 - samples/sec: 174.54 - lr: 0.000041
540
+ 2023-08-17 16:49:04,478 epoch 14 - iter 48/243 - loss 0.28369661 - time (sec): 89.50 - samples/sec: 172.66 - lr: 0.000041
541
+ 2023-08-17 16:49:49,761 epoch 14 - iter 72/243 - loss 0.29071442 - time (sec): 134.79 - samples/sec: 172.98 - lr: 0.000041
542
+ 2023-08-17 16:50:35,252 epoch 14 - iter 96/243 - loss 0.29219267 - time (sec): 180.28 - samples/sec: 174.45 - lr: 0.000041
543
+ 2023-08-17 16:51:20,403 epoch 14 - iter 120/243 - loss 0.29452027 - time (sec): 225.43 - samples/sec: 173.61 - lr: 0.000041
544
+ 2023-08-17 16:52:04,985 epoch 14 - iter 144/243 - loss 0.28860385 - time (sec): 270.01 - samples/sec: 173.32 - lr: 0.000040
545
+ 2023-08-17 16:52:50,260 epoch 14 - iter 168/243 - loss 0.29040567 - time (sec): 315.29 - samples/sec: 173.20 - lr: 0.000040
546
+ 2023-08-17 16:53:35,397 epoch 14 - iter 192/243 - loss 0.29057669 - time (sec): 360.42 - samples/sec: 173.11 - lr: 0.000040
547
+ 2023-08-17 16:54:20,056 epoch 14 - iter 216/243 - loss 0.29351512 - time (sec): 405.08 - samples/sec: 173.19 - lr: 0.000040
548
+ 2023-08-17 16:55:04,885 epoch 14 - iter 240/243 - loss 0.29475470 - time (sec): 449.91 - samples/sec: 172.96 - lr: 0.000040
549
+ 2023-08-17 16:55:09,989 ----------------------------------------------------------------------------------------------------
550
+ 2023-08-17 16:55:09,989 EPOCH 14 done: loss 0.2946 - lr 0.000040
551
+ 2023-08-17 16:55:11,713 Evaluating as a multi-label problem: False
552
+ 2023-08-17 16:55:11,755 DEV : loss 0.1961415857076645 - f1-score (micro avg) 0.9729
553
+ 2023-08-17 16:55:11,765 ----------------------------------------------------------------------------------------------------
554
+ 2023-08-17 16:55:56,753 epoch 15 - iter 24/243 - loss 0.32628632 - time (sec): 44.99 - samples/sec: 171.87 - lr: 0.000040
555
+ 2023-08-17 16:56:41,837 epoch 15 - iter 48/243 - loss 0.30408958 - time (sec): 90.07 - samples/sec: 172.63 - lr: 0.000040
556
+ 2023-08-17 16:57:27,089 epoch 15 - iter 72/243 - loss 0.29750206 - time (sec): 135.32 - samples/sec: 173.46 - lr: 0.000040
557
+ 2023-08-17 16:58:12,226 epoch 15 - iter 96/243 - loss 0.29760832 - time (sec): 180.46 - samples/sec: 172.58 - lr: 0.000040
558
+ 2023-08-17 16:58:57,529 epoch 15 - iter 120/243 - loss 0.29974418 - time (sec): 225.76 - samples/sec: 172.39 - lr: 0.000039
559
+ 2023-08-17 16:59:42,724 epoch 15 - iter 144/243 - loss 0.29904887 - time (sec): 270.96 - samples/sec: 172.12 - lr: 0.000039
560
+ 2023-08-17 17:00:27,857 epoch 15 - iter 168/243 - loss 0.29894209 - time (sec): 316.09 - samples/sec: 172.07 - lr: 0.000039
561
+ 2023-08-17 17:01:12,968 epoch 15 - iter 192/243 - loss 0.29754010 - time (sec): 361.20 - samples/sec: 172.12 - lr: 0.000039
562
+ 2023-08-17 17:01:58,260 epoch 15 - iter 216/243 - loss 0.29884402 - time (sec): 406.49 - samples/sec: 171.83 - lr: 0.000039
563
+ 2023-08-17 17:02:43,469 epoch 15 - iter 240/243 - loss 0.29706337 - time (sec): 451.70 - samples/sec: 172.03 - lr: 0.000039
564
+ 2023-08-17 17:02:48,630 ----------------------------------------------------------------------------------------------------
565
+ 2023-08-17 17:02:48,630 EPOCH 15 done: loss 0.2971 - lr 0.000039
566
+ 2023-08-17 17:02:50,353 Evaluating as a multi-label problem: False
567
+ 2023-08-17 17:02:50,395 DEV : loss 0.21415923535823822 - f1-score (micro avg) 0.9737
568
+ 2023-08-17 17:02:50,405 ----------------------------------------------------------------------------------------------------
569
+ 2023-08-17 17:03:35,348 epoch 16 - iter 24/243 - loss 0.32918671 - time (sec): 44.94 - samples/sec: 172.49 - lr: 0.000039
570
+ 2023-08-17 17:04:20,329 epoch 16 - iter 48/243 - loss 0.30668793 - time (sec): 89.92 - samples/sec: 171.87 - lr: 0.000039
571
+ 2023-08-17 17:05:05,277 epoch 16 - iter 72/243 - loss 0.30165600 - time (sec): 134.87 - samples/sec: 171.50 - lr: 0.000039
572
+ 2023-08-17 17:05:50,490 epoch 16 - iter 96/243 - loss 0.29977956 - time (sec): 180.08 - samples/sec: 172.09 - lr: 0.000038
573
+ 2023-08-17 17:06:35,472 epoch 16 - iter 120/243 - loss 0.29035278 - time (sec): 225.07 - samples/sec: 171.91 - lr: 0.000038
574
+ 2023-08-17 17:07:20,567 epoch 16 - iter 144/243 - loss 0.28688344 - time (sec): 270.16 - samples/sec: 172.44 - lr: 0.000038
575
+ 2023-08-17 17:08:05,656 epoch 16 - iter 168/243 - loss 0.28573744 - time (sec): 315.25 - samples/sec: 172.62 - lr: 0.000038
576
+ 2023-08-17 17:08:50,717 epoch 16 - iter 192/243 - loss 0.28483557 - time (sec): 360.31 - samples/sec: 172.35 - lr: 0.000038
577
+ 2023-08-17 17:09:35,976 epoch 16 - iter 216/243 - loss 0.28487700 - time (sec): 405.57 - samples/sec: 172.52 - lr: 0.000038
578
+ 2023-08-17 17:10:21,036 epoch 16 - iter 240/243 - loss 0.28570848 - time (sec): 450.63 - samples/sec: 172.55 - lr: 0.000038
579
+ 2023-08-17 17:10:26,150 ----------------------------------------------------------------------------------------------------
580
+ 2023-08-17 17:10:26,150 EPOCH 16 done: loss 0.2858 - lr 0.000038
581
+ 2023-08-17 17:10:27,872 Evaluating as a multi-label problem: False
582
+ 2023-08-17 17:10:27,914 DEV : loss 0.17488490045070648 - f1-score (micro avg) 0.9764
583
+ 2023-08-17 17:10:27,925 ----------------------------------------------------------------------------------------------------
584
+ 2023-08-17 17:11:13,057 epoch 17 - iter 24/243 - loss 0.28223418 - time (sec): 45.13 - samples/sec: 168.04 - lr: 0.000038
585
+ 2023-08-17 17:11:58,333 epoch 17 - iter 48/243 - loss 0.28773045 - time (sec): 90.41 - samples/sec: 168.82 - lr: 0.000038
586
+ 2023-08-17 17:12:43,743 epoch 17 - iter 72/243 - loss 0.28949629 - time (sec): 135.82 - samples/sec: 170.46 - lr: 0.000037
587
+ 2023-08-17 17:13:29,045 epoch 17 - iter 96/243 - loss 0.29081122 - time (sec): 181.12 - samples/sec: 171.25 - lr: 0.000037
588
+ 2023-08-17 17:14:14,415 epoch 17 - iter 120/243 - loss 0.28910214 - time (sec): 226.49 - samples/sec: 171.37 - lr: 0.000037
589
+ 2023-08-17 17:14:59,692 epoch 17 - iter 144/243 - loss 0.28813940 - time (sec): 271.77 - samples/sec: 172.03 - lr: 0.000037
590
+ 2023-08-17 17:15:44,880 epoch 17 - iter 168/243 - loss 0.28649377 - time (sec): 316.96 - samples/sec: 172.13 - lr: 0.000037
591
+ 2023-08-17 17:16:30,227 epoch 17 - iter 192/243 - loss 0.28690817 - time (sec): 362.30 - samples/sec: 172.49 - lr: 0.000037
592
+ 2023-08-17 17:17:15,249 epoch 17 - iter 216/243 - loss 0.28529445 - time (sec): 407.32 - samples/sec: 171.86 - lr: 0.000037
593
+ 2023-08-17 17:18:00,098 epoch 17 - iter 240/243 - loss 0.28495055 - time (sec): 452.17 - samples/sec: 171.78 - lr: 0.000037
594
+ 2023-08-17 17:18:05,257 ----------------------------------------------------------------------------------------------------
595
+ 2023-08-17 17:18:05,257 EPOCH 17 done: loss 0.2845 - lr 0.000037
596
+ 2023-08-17 17:18:06,980 Evaluating as a multi-label problem: False
597
+ 2023-08-17 17:18:07,022 DEV : loss 0.1961992233991623 - f1-score (micro avg) 0.9764
598
+ 2023-08-17 17:18:07,032 ----------------------------------------------------------------------------------------------------
599
+ 2023-08-17 17:18:52,513 epoch 18 - iter 24/243 - loss 0.28778804 - time (sec): 45.48 - samples/sec: 182.80 - lr: 0.000037
600
+ 2023-08-17 17:19:37,661 epoch 18 - iter 48/243 - loss 0.28633144 - time (sec): 90.63 - samples/sec: 176.20 - lr: 0.000036
601
+ 2023-08-17 17:20:22,553 epoch 18 - iter 72/243 - loss 0.28829018 - time (sec): 135.52 - samples/sec: 174.75 - lr: 0.000036
602
+ 2023-08-17 17:21:07,965 epoch 18 - iter 96/243 - loss 0.28737825 - time (sec): 180.93 - samples/sec: 174.76 - lr: 0.000036
603
+ 2023-08-17 17:21:53,184 epoch 18 - iter 120/243 - loss 0.28870528 - time (sec): 226.15 - samples/sec: 175.04 - lr: 0.000036
604
+ 2023-08-17 17:22:38,349 epoch 18 - iter 144/243 - loss 0.28536506 - time (sec): 271.32 - samples/sec: 174.53 - lr: 0.000036
605
+ 2023-08-17 17:23:23,514 epoch 18 - iter 168/243 - loss 0.28612314 - time (sec): 316.48 - samples/sec: 174.07 - lr: 0.000036
606
+ 2023-08-17 17:24:08,584 epoch 18 - iter 192/243 - loss 0.28681958 - time (sec): 361.55 - samples/sec: 173.33 - lr: 0.000036
607
+ 2023-08-17 17:24:53,654 epoch 18 - iter 216/243 - loss 0.28815101 - time (sec): 406.62 - samples/sec: 173.03 - lr: 0.000036
608
+ 2023-08-17 17:25:38,599 epoch 18 - iter 240/243 - loss 0.28697818 - time (sec): 451.57 - samples/sec: 172.30 - lr: 0.000036
609
+ 2023-08-17 17:25:43,717 ----------------------------------------------------------------------------------------------------
610
+ 2023-08-17 17:25:43,717 EPOCH 18 done: loss 0.2865 - lr 0.000036
611
+ 2023-08-17 17:25:45,959 Evaluating as a multi-label problem: False
612
+ 2023-08-17 17:25:46,000 DEV : loss 0.18113288283348083 - f1-score (micro avg) 0.9781
613
+ 2023-08-17 17:25:46,010 saving best model
614
+ 2023-08-17 17:25:49,383 ----------------------------------------------------------------------------------------------------
615
+ 2023-08-17 17:26:34,436 epoch 19 - iter 24/243 - loss 0.28138164 - time (sec): 45.05 - samples/sec: 176.28 - lr: 0.000036
616
+ 2023-08-17 17:27:19,265 epoch 19 - iter 48/243 - loss 0.28992986 - time (sec): 89.88 - samples/sec: 171.22 - lr: 0.000035
617
+ 2023-08-17 17:28:04,355 epoch 19 - iter 72/243 - loss 0.28244605 - time (sec): 134.97 - samples/sec: 171.95 - lr: 0.000035
618
+ 2023-08-17 17:28:49,083 epoch 19 - iter 96/243 - loss 0.28642854 - time (sec): 179.70 - samples/sec: 171.69 - lr: 0.000035
619
+ 2023-08-17 17:29:34,124 epoch 19 - iter 120/243 - loss 0.28768114 - time (sec): 224.74 - samples/sec: 171.98 - lr: 0.000035
620
+ 2023-08-17 17:30:19,241 epoch 19 - iter 144/243 - loss 0.28722806 - time (sec): 269.86 - samples/sec: 172.38 - lr: 0.000035
621
+ 2023-08-17 17:31:04,389 epoch 19 - iter 168/243 - loss 0.28477685 - time (sec): 315.01 - samples/sec: 172.83 - lr: 0.000035
622
+ 2023-08-17 17:31:49,556 epoch 19 - iter 192/243 - loss 0.28564618 - time (sec): 360.17 - samples/sec: 172.82 - lr: 0.000035
623
+ 2023-08-17 17:32:34,558 epoch 19 - iter 216/243 - loss 0.28166734 - time (sec): 405.17 - samples/sec: 172.45 - lr: 0.000035
624
+ 2023-08-17 17:33:19,570 epoch 19 - iter 240/243 - loss 0.28044622 - time (sec): 450.19 - samples/sec: 172.59 - lr: 0.000035
625
+ 2023-08-17 17:33:24,717 ----------------------------------------------------------------------------------------------------
626
+ 2023-08-17 17:33:24,718 EPOCH 19 done: loss 0.2808 - lr 0.000035
627
+ 2023-08-17 17:33:26,436 Evaluating as a multi-label problem: False
628
+ 2023-08-17 17:33:26,478 DEV : loss 0.2043328434228897 - f1-score (micro avg) 0.9793
629
+ 2023-08-17 17:33:26,488 saving best model
630
+ 2023-08-17 17:33:29,764 ----------------------------------------------------------------------------------------------------
631
+ 2023-08-17 17:34:14,947 epoch 20 - iter 24/243 - loss 0.28666954 - time (sec): 45.18 - samples/sec: 172.37 - lr: 0.000034
632
+ 2023-08-17 17:35:00,164 epoch 20 - iter 48/243 - loss 0.29481761 - time (sec): 90.40 - samples/sec: 170.59 - lr: 0.000034
633
+ 2023-08-17 17:35:45,493 epoch 20 - iter 72/243 - loss 0.29914317 - time (sec): 135.73 - samples/sec: 170.85 - lr: 0.000034
634
+ 2023-08-17 17:36:30,645 epoch 20 - iter 96/243 - loss 0.29393948 - time (sec): 180.88 - samples/sec: 170.43 - lr: 0.000034
635
+ 2023-08-17 17:37:16,256 epoch 20 - iter 120/243 - loss 0.29259273 - time (sec): 226.49 - samples/sec: 170.21 - lr: 0.000034
636
+ 2023-08-17 17:38:01,739 epoch 20 - iter 144/243 - loss 0.29189521 - time (sec): 271.97 - samples/sec: 170.83 - lr: 0.000034
637
+ 2023-08-17 17:38:47,133 epoch 20 - iter 168/243 - loss 0.29174956 - time (sec): 317.37 - samples/sec: 171.42 - lr: 0.000034
638
+ 2023-08-17 17:39:32,336 epoch 20 - iter 192/243 - loss 0.28991116 - time (sec): 362.57 - samples/sec: 171.20 - lr: 0.000034
639
+ 2023-08-17 17:40:17,647 epoch 20 - iter 216/243 - loss 0.28908421 - time (sec): 407.88 - samples/sec: 170.75 - lr: 0.000034
640
+ 2023-08-17 17:41:03,056 epoch 20 - iter 240/243 - loss 0.28802142 - time (sec): 453.29 - samples/sec: 171.60 - lr: 0.000033
641
+ 2023-08-17 17:41:08,187 ----------------------------------------------------------------------------------------------------
642
+ 2023-08-17 17:41:08,187 EPOCH 20 done: loss 0.2884 - lr 0.000033
643
+ 2023-08-17 17:41:09,907 Evaluating as a multi-label problem: False
644
+ 2023-08-17 17:41:09,950 DEV : loss 0.17976026237010956 - f1-score (micro avg) 0.9798
645
+ 2023-08-17 17:41:09,960 saving best model
646
+ 2023-08-17 17:41:13,247 ----------------------------------------------------------------------------------------------------
647
+ 2023-08-17 17:41:58,342 epoch 21 - iter 24/243 - loss 0.27074814 - time (sec): 45.09 - samples/sec: 173.32 - lr: 0.000033
648
+ 2023-08-17 17:42:43,633 epoch 21 - iter 48/243 - loss 0.27757152 - time (sec): 90.39 - samples/sec: 170.96 - lr: 0.000033
649
+ 2023-08-17 17:43:28,902 epoch 21 - iter 72/243 - loss 0.27454337 - time (sec): 135.65 - samples/sec: 170.88 - lr: 0.000033
650
+ 2023-08-17 17:44:14,170 epoch 21 - iter 96/243 - loss 0.27609707 - time (sec): 180.92 - samples/sec: 170.54 - lr: 0.000033
651
+ 2023-08-17 17:44:59,479 epoch 21 - iter 120/243 - loss 0.27224083 - time (sec): 226.23 - samples/sec: 170.65 - lr: 0.000033
652
+ 2023-08-17 17:45:44,872 epoch 21 - iter 144/243 - loss 0.27850149 - time (sec): 271.63 - samples/sec: 170.36 - lr: 0.000033
653
+ 2023-08-17 17:46:30,164 epoch 21 - iter 168/243 - loss 0.27696398 - time (sec): 316.92 - samples/sec: 170.36 - lr: 0.000033
654
+ 2023-08-17 17:47:15,510 epoch 21 - iter 192/243 - loss 0.27664755 - time (sec): 362.26 - samples/sec: 170.35 - lr: 0.000033
655
+ 2023-08-17 17:48:00,766 epoch 21 - iter 216/243 - loss 0.27558848 - time (sec): 407.52 - samples/sec: 171.41 - lr: 0.000032
656
+ 2023-08-17 17:48:46,468 epoch 21 - iter 240/243 - loss 0.27583214 - time (sec): 453.22 - samples/sec: 171.49 - lr: 0.000032
657
+ 2023-08-17 17:48:51,686 ----------------------------------------------------------------------------------------------------
658
+ 2023-08-17 17:48:51,687 EPOCH 21 done: loss 0.2761 - lr 0.000032
659
+ 2023-08-17 17:48:53,412 Evaluating as a multi-label problem: False
660
+ 2023-08-17 17:48:53,454 DEV : loss 0.20532046258449554 - f1-score (micro avg) 0.9808
661
+ 2023-08-17 17:48:53,465 saving best model
662
+ 2023-08-17 17:48:56,721 ----------------------------------------------------------------------------------------------------
663
+ 2023-08-17 17:49:41,659 epoch 22 - iter 24/243 - loss 0.27909847 - time (sec): 44.94 - samples/sec: 171.95 - lr: 0.000032
664
+ 2023-08-17 17:50:27,390 epoch 22 - iter 48/243 - loss 0.27692541 - time (sec): 90.67 - samples/sec: 173.72 - lr: 0.000032
665
+ 2023-08-17 17:51:12,644 epoch 22 - iter 72/243 - loss 0.27632545 - time (sec): 135.92 - samples/sec: 173.33 - lr: 0.000032
666
+ 2023-08-17 17:51:57,937 epoch 22 - iter 96/243 - loss 0.27607549 - time (sec): 181.22 - samples/sec: 173.34 - lr: 0.000032
667
+ 2023-08-17 17:52:43,266 epoch 22 - iter 120/243 - loss 0.27687957 - time (sec): 226.54 - samples/sec: 173.26 - lr: 0.000032
668
+ 2023-08-17 17:53:28,542 epoch 22 - iter 144/243 - loss 0.27294774 - time (sec): 271.82 - samples/sec: 172.77 - lr: 0.000032
669
+ 2023-08-17 17:54:14,153 epoch 22 - iter 168/243 - loss 0.27391471 - time (sec): 317.43 - samples/sec: 171.73 - lr: 0.000032
670
+ 2023-08-17 17:54:59,580 epoch 22 - iter 192/243 - loss 0.27352263 - time (sec): 362.86 - samples/sec: 171.50 - lr: 0.000031
671
+ 2023-08-17 17:55:44,989 epoch 22 - iter 216/243 - loss 0.27144978 - time (sec): 408.27 - samples/sec: 171.53 - lr: 0.000031
672
+ 2023-08-17 17:56:30,411 epoch 22 - iter 240/243 - loss 0.27338785 - time (sec): 453.69 - samples/sec: 171.29 - lr: 0.000031
673
+ 2023-08-17 17:56:35,566 ----------------------------------------------------------------------------------------------------
674
+ 2023-08-17 17:56:35,566 EPOCH 22 done: loss 0.2738 - lr 0.000031
675
+ 2023-08-17 17:56:37,289 Evaluating as a multi-label problem: False
676
+ 2023-08-17 17:56:37,331 DEV : loss 0.20975473523139954 - f1-score (micro avg) 0.9771
677
+ 2023-08-17 17:56:37,342 ----------------------------------------------------------------------------------------------------
678
+ 2023-08-17 17:57:22,601 epoch 23 - iter 24/243 - loss 0.28534317 - time (sec): 45.26 - samples/sec: 174.33 - lr: 0.000031
679
+ 2023-08-17 17:58:07,820 epoch 23 - iter 48/243 - loss 0.28084455 - time (sec): 90.48 - samples/sec: 171.76 - lr: 0.000031
680
+ 2023-08-17 17:58:53,169 epoch 23 - iter 72/243 - loss 0.28011749 - time (sec): 135.83 - samples/sec: 171.77 - lr: 0.000031
681
+ 2023-08-17 17:59:38,445 epoch 23 - iter 96/243 - loss 0.28443955 - time (sec): 181.10 - samples/sec: 171.14 - lr: 0.000031
682
+ 2023-08-17 18:00:23,765 epoch 23 - iter 120/243 - loss 0.28290269 - time (sec): 226.42 - samples/sec: 171.89 - lr: 0.000031
683
+ 2023-08-17 18:01:08,928 epoch 23 - iter 144/243 - loss 0.28079246 - time (sec): 271.59 - samples/sec: 171.41 - lr: 0.000031
684
+ 2023-08-17 18:01:54,100 epoch 23 - iter 168/243 - loss 0.27982769 - time (sec): 316.76 - samples/sec: 172.02 - lr: 0.000030
685
+ 2023-08-17 18:02:39,443 epoch 23 - iter 192/243 - loss 0.27685678 - time (sec): 362.10 - samples/sec: 171.50 - lr: 0.000030
686
+ 2023-08-17 18:03:24,560 epoch 23 - iter 216/243 - loss 0.27359946 - time (sec): 407.22 - samples/sec: 172.03 - lr: 0.000030
687
+ 2023-08-17 18:04:09,622 epoch 23 - iter 240/243 - loss 0.27378796 - time (sec): 452.28 - samples/sec: 171.74 - lr: 0.000030
688
+ 2023-08-17 18:04:14,868 ----------------------------------------------------------------------------------------------------
689
+ 2023-08-17 18:04:14,868 EPOCH 23 done: loss 0.2739 - lr 0.000030
690
+ 2023-08-17 18:04:16,589 Evaluating as a multi-label problem: False
691
+ 2023-08-17 18:04:16,631 DEV : loss 0.21456189453601837 - f1-score (micro avg) 0.9796
692
+ 2023-08-17 18:04:16,641 ----------------------------------------------------------------------------------------------------
693
+ 2023-08-17 18:05:01,650 epoch 24 - iter 24/243 - loss 0.28123621 - time (sec): 45.01 - samples/sec: 167.23 - lr: 0.000030
694
+ 2023-08-17 18:05:46,771 epoch 24 - iter 48/243 - loss 0.27128197 - time (sec): 90.13 - samples/sec: 167.10 - lr: 0.000030
695
+ 2023-08-17 18:06:31,820 epoch 24 - iter 72/243 - loss 0.26742573 - time (sec): 135.18 - samples/sec: 167.98 - lr: 0.000030
696
+ 2023-08-17 18:07:16,912 epoch 24 - iter 96/243 - loss 0.27426501 - time (sec): 180.27 - samples/sec: 168.45 - lr: 0.000030
697
+ 2023-08-17 18:08:02,205 epoch 24 - iter 120/243 - loss 0.26958800 - time (sec): 225.56 - samples/sec: 170.14 - lr: 0.000030
698
+ 2023-08-17 18:08:47,649 epoch 24 - iter 144/243 - loss 0.27011544 - time (sec): 271.01 - samples/sec: 172.38 - lr: 0.000029
699
+ 2023-08-17 18:09:32,737 epoch 24 - iter 168/243 - loss 0.26573691 - time (sec): 316.10 - samples/sec: 172.12 - lr: 0.000029
700
+ 2023-08-17 18:10:17,866 epoch 24 - iter 192/243 - loss 0.26424698 - time (sec): 361.22 - samples/sec: 172.18 - lr: 0.000029
701
+ 2023-08-17 18:11:04,671 epoch 24 - iter 216/243 - loss 0.26555746 - time (sec): 408.03 - samples/sec: 171.04 - lr: 0.000029
702
+ 2023-08-17 18:11:53,686 epoch 24 - iter 240/243 - loss 0.26918457 - time (sec): 457.05 - samples/sec: 169.93 - lr: 0.000029
703
+ 2023-08-17 18:11:59,322 ----------------------------------------------------------------------------------------------------
704
+ 2023-08-17 18:11:59,322 EPOCH 24 done: loss 0.2696 - lr 0.000029
705
+ 2023-08-17 18:12:01,087 Evaluating as a multi-label problem: False
706
+ 2023-08-17 18:12:01,129 DEV : loss 0.21408958733081818 - f1-score (micro avg) 0.9788
707
+ 2023-08-17 18:12:01,139 ----------------------------------------------------------------------------------------------------
708
+ 2023-08-17 18:12:46,196 epoch 25 - iter 24/243 - loss 0.26057600 - time (sec): 45.06 - samples/sec: 173.60 - lr: 0.000029
709
+ 2023-08-17 18:13:31,223 epoch 25 - iter 48/243 - loss 0.25988897 - time (sec): 90.08 - samples/sec: 173.36 - lr: 0.000029
710
+ 2023-08-17 18:14:16,253 epoch 25 - iter 72/243 - loss 0.26336622 - time (sec): 135.11 - samples/sec: 173.77 - lr: 0.000029
711
+ 2023-08-17 18:15:01,325 epoch 25 - iter 96/243 - loss 0.26126366 - time (sec): 180.19 - samples/sec: 174.18 - lr: 0.000029
712
+ 2023-08-17 18:15:46,328 epoch 25 - iter 120/243 - loss 0.26114761 - time (sec): 225.19 - samples/sec: 173.64 - lr: 0.000028
713
+ 2023-08-17 18:16:31,328 epoch 25 - iter 144/243 - loss 0.26019042 - time (sec): 270.19 - samples/sec: 173.30 - lr: 0.000028
714
+ 2023-08-17 18:17:17,105 epoch 25 - iter 168/243 - loss 0.26060643 - time (sec): 315.97 - samples/sec: 172.65 - lr: 0.000028
715
+ 2023-08-17 18:18:06,152 epoch 25 - iter 192/243 - loss 0.26158525 - time (sec): 365.01 - samples/sec: 171.24 - lr: 0.000028
716
+ 2023-08-17 18:18:55,027 epoch 25 - iter 216/243 - loss 0.25965178 - time (sec): 413.89 - samples/sec: 169.43 - lr: 0.000028
717
+ 2023-08-17 18:19:43,902 epoch 25 - iter 240/243 - loss 0.25991617 - time (sec): 462.76 - samples/sec: 168.12 - lr: 0.000028
718
+ 2023-08-17 18:19:49,428 ----------------------------------------------------------------------------------------------------
719
+ 2023-08-17 18:19:49,428 EPOCH 25 done: loss 0.2605 - lr 0.000028
720
+ 2023-08-17 18:19:51,139 Evaluating as a multi-label problem: False
721
+ 2023-08-17 18:19:51,180 DEV : loss 0.20778048038482666 - f1-score (micro avg) 0.9801
722
+ 2023-08-17 18:19:51,190 ----------------------------------------------------------------------------------------------------
723
+ 2023-08-17 18:20:35,881 epoch 26 - iter 24/243 - loss 0.25028245 - time (sec): 44.69 - samples/sec: 171.74 - lr: 0.000028
724
+ 2023-08-17 18:21:20,589 epoch 26 - iter 48/243 - loss 0.26759368 - time (sec): 89.40 - samples/sec: 173.91 - lr: 0.000028
725
+ 2023-08-17 18:22:05,046 epoch 26 - iter 72/243 - loss 0.26240750 - time (sec): 133.86 - samples/sec: 173.31 - lr: 0.000028
726
+ 2023-08-17 18:22:49,769 epoch 26 - iter 96/243 - loss 0.26499737 - time (sec): 178.58 - samples/sec: 173.74 - lr: 0.000027
727
+ 2023-08-17 18:23:34,328 epoch 26 - iter 120/243 - loss 0.26765442 - time (sec): 223.14 - samples/sec: 172.73 - lr: 0.000027
728
+ 2023-08-17 18:24:18,977 epoch 26 - iter 144/243 - loss 0.26496660 - time (sec): 267.79 - samples/sec: 173.02 - lr: 0.000027
729
+ 2023-08-17 18:25:03,720 epoch 26 - iter 168/243 - loss 0.26407033 - time (sec): 312.53 - samples/sec: 173.71 - lr: 0.000027
730
+ 2023-08-17 18:25:48,390 epoch 26 - iter 192/243 - loss 0.26463487 - time (sec): 357.20 - samples/sec: 173.79 - lr: 0.000027
731
+ 2023-08-17 18:26:33,167 epoch 26 - iter 216/243 - loss 0.26192074 - time (sec): 401.98 - samples/sec: 174.24 - lr: 0.000027
732
+ 2023-08-17 18:27:17,792 epoch 26 - iter 240/243 - loss 0.26299030 - time (sec): 446.60 - samples/sec: 174.06 - lr: 0.000027
733
+ 2023-08-17 18:27:22,878 ----------------------------------------------------------------------------------------------------
734
+ 2023-08-17 18:27:22,878 EPOCH 26 done: loss 0.2631 - lr 0.000027
735
+ 2023-08-17 18:27:24,593 Evaluating as a multi-label problem: False
736
+ 2023-08-17 18:27:24,634 DEV : loss 0.22401468455791473 - f1-score (micro avg) 0.9786
737
+ 2023-08-17 18:27:24,644 ----------------------------------------------------------------------------------------------------
738
+ 2023-08-17 18:28:09,713 epoch 27 - iter 24/243 - loss 0.26639657 - time (sec): 45.07 - samples/sec: 181.61 - lr: 0.000027
739
+ 2023-08-17 18:28:54,491 epoch 27 - iter 48/243 - loss 0.27451501 - time (sec): 89.85 - samples/sec: 177.51 - lr: 0.000027
740
+ 2023-08-17 18:29:39,240 epoch 27 - iter 72/243 - loss 0.27289399 - time (sec): 134.60 - samples/sec: 175.50 - lr: 0.000026
741
+ 2023-08-17 18:30:23,954 epoch 27 - iter 96/243 - loss 0.27091536 - time (sec): 179.31 - samples/sec: 174.22 - lr: 0.000026
742
+ 2023-08-17 18:31:08,555 epoch 27 - iter 120/243 - loss 0.27191898 - time (sec): 223.91 - samples/sec: 172.72 - lr: 0.000026
743
+ 2023-08-17 18:31:53,465 epoch 27 - iter 144/243 - loss 0.27013358 - time (sec): 268.82 - samples/sec: 172.72 - lr: 0.000026
744
+ 2023-08-17 18:32:38,347 epoch 27 - iter 168/243 - loss 0.26766038 - time (sec): 313.70 - samples/sec: 172.27 - lr: 0.000026
745
+ 2023-08-17 18:33:23,232 epoch 27 - iter 192/243 - loss 0.26602770 - time (sec): 358.59 - samples/sec: 172.78 - lr: 0.000026
746
+ 2023-08-17 18:34:08,007 epoch 27 - iter 216/243 - loss 0.26757355 - time (sec): 403.36 - samples/sec: 172.98 - lr: 0.000026
747
+ 2023-08-17 18:34:52,870 epoch 27 - iter 240/243 - loss 0.26544815 - time (sec): 448.23 - samples/sec: 173.38 - lr: 0.000026
748
+ 2023-08-17 18:34:57,998 ----------------------------------------------------------------------------------------------------
749
+ 2023-08-17 18:34:57,999 EPOCH 27 done: loss 0.2656 - lr 0.000026
750
+ 2023-08-17 18:35:00,163 Evaluating as a multi-label problem: False
751
+ 2023-08-17 18:35:00,204 DEV : loss 0.2324327975511551 - f1-score (micro avg) 0.9771
752
+ 2023-08-17 18:35:00,214 ----------------------------------------------------------------------------------------------------
753
+ 2023-08-17 18:35:45,057 epoch 28 - iter 24/243 - loss 0.26044359 - time (sec): 44.84 - samples/sec: 176.80 - lr: 0.000026
754
+ 2023-08-17 18:36:29,805 epoch 28 - iter 48/243 - loss 0.25192260 - time (sec): 89.59 - samples/sec: 174.98 - lr: 0.000025
755
+ 2023-08-17 18:37:14,636 epoch 28 - iter 72/243 - loss 0.24867911 - time (sec): 134.42 - samples/sec: 175.24 - lr: 0.000025
756
+ 2023-08-17 18:37:59,425 epoch 28 - iter 96/243 - loss 0.25204485 - time (sec): 179.21 - samples/sec: 175.04 - lr: 0.000025
757
+ 2023-08-17 18:38:44,217 epoch 28 - iter 120/243 - loss 0.24981817 - time (sec): 224.00 - samples/sec: 174.13 - lr: 0.000025
758
+ 2023-08-17 18:39:28,909 epoch 28 - iter 144/243 - loss 0.25157168 - time (sec): 268.69 - samples/sec: 173.71 - lr: 0.000025
759
+ 2023-08-17 18:40:13,658 epoch 28 - iter 168/243 - loss 0.25440998 - time (sec): 313.44 - samples/sec: 173.28 - lr: 0.000025
760
+ 2023-08-17 18:40:58,464 epoch 28 - iter 192/243 - loss 0.25791455 - time (sec): 358.25 - samples/sec: 173.25 - lr: 0.000025
761
+ 2023-08-17 18:41:43,198 epoch 28 - iter 216/243 - loss 0.26113615 - time (sec): 402.98 - samples/sec: 173.07 - lr: 0.000025
762
+ 2023-08-17 18:42:28,195 epoch 28 - iter 240/243 - loss 0.26254906 - time (sec): 447.98 - samples/sec: 173.70 - lr: 0.000025
763
+ 2023-08-17 18:42:33,262 ----------------------------------------------------------------------------------------------------
764
+ 2023-08-17 18:42:33,262 EPOCH 28 done: loss 0.2628 - lr 0.000025
765
+ 2023-08-17 18:42:34,976 Evaluating as a multi-label problem: False
766
+ 2023-08-17 18:42:35,018 DEV : loss 0.21640333533287048 - f1-score (micro avg) 0.9803
767
+ 2023-08-17 18:42:35,028 ----------------------------------------------------------------------------------------------------
768
+ 2023-08-17 18:43:19,681 epoch 29 - iter 24/243 - loss 0.24833162 - time (sec): 44.65 - samples/sec: 173.00 - lr: 0.000024
769
+ 2023-08-17 18:44:04,305 epoch 29 - iter 48/243 - loss 0.25554505 - time (sec): 89.28 - samples/sec: 172.79 - lr: 0.000024
770
+ 2023-08-17 18:44:49,078 epoch 29 - iter 72/243 - loss 0.26313723 - time (sec): 134.05 - samples/sec: 172.52 - lr: 0.000024
771
+ 2023-08-17 18:45:33,848 epoch 29 - iter 96/243 - loss 0.26456129 - time (sec): 178.82 - samples/sec: 173.31 - lr: 0.000024
772
+ 2023-08-17 18:46:18,502 epoch 29 - iter 120/243 - loss 0.26539430 - time (sec): 223.47 - samples/sec: 172.87 - lr: 0.000024
773
+ 2023-08-17 18:47:03,393 epoch 29 - iter 144/243 - loss 0.26756174 - time (sec): 268.36 - samples/sec: 173.05 - lr: 0.000024
774
+ 2023-08-17 18:47:48,239 epoch 29 - iter 168/243 - loss 0.26309703 - time (sec): 313.21 - samples/sec: 173.83 - lr: 0.000024
775
+ 2023-08-17 18:48:32,795 epoch 29 - iter 192/243 - loss 0.26532971 - time (sec): 357.77 - samples/sec: 173.08 - lr: 0.000024
776
+ 2023-08-17 18:49:17,536 epoch 29 - iter 216/243 - loss 0.26648227 - time (sec): 402.51 - samples/sec: 173.14 - lr: 0.000024
777
+ 2023-08-17 18:50:02,514 epoch 29 - iter 240/243 - loss 0.26528743 - time (sec): 447.49 - samples/sec: 173.90 - lr: 0.000023
778
+ 2023-08-17 18:50:07,560 ----------------------------------------------------------------------------------------------------
779
+ 2023-08-17 18:50:07,560 EPOCH 29 done: loss 0.2655 - lr 0.000023
780
+ 2023-08-17 18:50:09,281 Evaluating as a multi-label problem: False
781
+ 2023-08-17 18:50:09,323 DEV : loss 0.24248327314853668 - f1-score (micro avg) 0.9796
782
+ 2023-08-17 18:50:09,333 ----------------------------------------------------------------------------------------------------
783
+ 2023-08-17 18:50:54,392 epoch 30 - iter 24/243 - loss 0.26154968 - time (sec): 45.06 - samples/sec: 173.59 - lr: 0.000023
784
+ 2023-08-17 18:51:39,520 epoch 30 - iter 48/243 - loss 0.27126768 - time (sec): 90.19 - samples/sec: 173.61 - lr: 0.000023
785
+ 2023-08-17 18:52:24,452 epoch 30 - iter 72/243 - loss 0.27468039 - time (sec): 135.12 - samples/sec: 171.63 - lr: 0.000023
786
+ 2023-08-17 18:53:09,546 epoch 30 - iter 96/243 - loss 0.27662270 - time (sec): 180.21 - samples/sec: 171.82 - lr: 0.000023
787
+ 2023-08-17 18:53:54,541 epoch 30 - iter 120/243 - loss 0.27403633 - time (sec): 225.21 - samples/sec: 171.64 - lr: 0.000023
788
+ 2023-08-17 18:54:39,592 epoch 30 - iter 144/243 - loss 0.27461637 - time (sec): 270.26 - samples/sec: 171.59 - lr: 0.000023
789
+ 2023-08-17 18:55:24,821 epoch 30 - iter 168/243 - loss 0.26994770 - time (sec): 315.49 - samples/sec: 172.42 - lr: 0.000023
790
+ 2023-08-17 18:56:10,003 epoch 30 - iter 192/243 - loss 0.26952319 - time (sec): 360.67 - samples/sec: 172.96 - lr: 0.000023
791
+ 2023-08-17 18:56:55,008 epoch 30 - iter 216/243 - loss 0.26556592 - time (sec): 405.67 - samples/sec: 173.01 - lr: 0.000022
792
+ 2023-08-17 18:57:39,988 epoch 30 - iter 240/243 - loss 0.26521277 - time (sec): 450.65 - samples/sec: 172.67 - lr: 0.000022
793
+ 2023-08-17 18:57:45,073 ----------------------------------------------------------------------------------------------------
794
+ 2023-08-17 18:57:45,073 EPOCH 30 done: loss 0.2653 - lr 0.000022
795
+ 2023-08-17 18:57:46,797 Evaluating as a multi-label problem: False
796
+ 2023-08-17 18:57:46,839 DEV : loss 0.23393450677394867 - f1-score (micro avg) 0.9776
797
+ 2023-08-17 18:57:46,850 ----------------------------------------------------------------------------------------------------
798
+ 2023-08-17 18:58:32,160 epoch 31 - iter 24/243 - loss 0.24073944 - time (sec): 45.31 - samples/sec: 178.61 - lr: 0.000022
799
+ 2023-08-17 18:59:17,145 epoch 31 - iter 48/243 - loss 0.24507990 - time (sec): 90.29 - samples/sec: 171.40 - lr: 0.000022
800
+ 2023-08-17 19:00:02,391 epoch 31 - iter 72/243 - loss 0.25127541 - time (sec): 135.54 - samples/sec: 172.07 - lr: 0.000022
801
+ 2023-08-17 19:00:47,687 epoch 31 - iter 96/243 - loss 0.25526836 - time (sec): 180.84 - samples/sec: 173.32 - lr: 0.000022
802
+ 2023-08-17 19:01:33,013 epoch 31 - iter 120/243 - loss 0.25884615 - time (sec): 226.16 - samples/sec: 172.93 - lr: 0.000022
803
+ 2023-08-17 19:02:18,184 epoch 31 - iter 144/243 - loss 0.26107421 - time (sec): 271.33 - samples/sec: 172.79 - lr: 0.000022
804
+ 2023-08-17 19:03:03,149 epoch 31 - iter 168/243 - loss 0.25772191 - time (sec): 316.30 - samples/sec: 172.16 - lr: 0.000022
805
+ 2023-08-17 19:03:48,306 epoch 31 - iter 192/243 - loss 0.25843953 - time (sec): 361.46 - samples/sec: 172.20 - lr: 0.000021
806
+ 2023-08-17 19:04:33,463 epoch 31 - iter 216/243 - loss 0.25940033 - time (sec): 406.61 - samples/sec: 172.46 - lr: 0.000021
807
+ 2023-08-17 19:05:18,617 epoch 31 - iter 240/243 - loss 0.25924131 - time (sec): 451.77 - samples/sec: 172.09 - lr: 0.000021
808
+ 2023-08-17 19:05:23,739 ----------------------------------------------------------------------------------------------------
809
+ 2023-08-17 19:05:23,740 EPOCH 31 done: loss 0.2594 - lr 0.000021
810
+ 2023-08-17 19:05:25,464 Evaluating as a multi-label problem: False
811
+ 2023-08-17 19:05:25,506 DEV : loss 0.22774212062358856 - f1-score (micro avg) 0.9788
812
+ 2023-08-17 19:05:25,516 ----------------------------------------------------------------------------------------------------
813
+ 2023-08-17 19:06:10,697 epoch 32 - iter 24/243 - loss 0.25476998 - time (sec): 45.18 - samples/sec: 177.38 - lr: 0.000021
814
+ 2023-08-17 19:06:55,760 epoch 32 - iter 48/243 - loss 0.25629909 - time (sec): 90.24 - samples/sec: 176.79 - lr: 0.000021
815
+ 2023-08-17 19:07:40,898 epoch 32 - iter 72/243 - loss 0.25739595 - time (sec): 135.38 - samples/sec: 175.59 - lr: 0.000021
816
+ 2023-08-17 19:08:26,455 epoch 32 - iter 96/243 - loss 0.26207122 - time (sec): 180.94 - samples/sec: 173.13 - lr: 0.000021
817
+ 2023-08-17 19:09:15,154 epoch 32 - iter 120/243 - loss 0.26238445 - time (sec): 229.64 - samples/sec: 170.68 - lr: 0.000021
818
+ 2023-08-17 19:10:03,555 epoch 32 - iter 144/243 - loss 0.26421827 - time (sec): 278.04 - samples/sec: 168.23 - lr: 0.000021
819
+ 2023-08-17 19:10:52,462 epoch 32 - iter 168/243 - loss 0.26554256 - time (sec): 326.95 - samples/sec: 167.61 - lr: 0.000020
820
+ 2023-08-17 19:11:41,005 epoch 32 - iter 192/243 - loss 0.26682748 - time (sec): 375.49 - samples/sec: 166.51 - lr: 0.000020
821
+ 2023-08-17 19:12:29,585 epoch 32 - iter 216/243 - loss 0.26495455 - time (sec): 424.07 - samples/sec: 166.10 - lr: 0.000020
822
+ 2023-08-17 19:13:17,979 epoch 32 - iter 240/243 - loss 0.26526827 - time (sec): 472.46 - samples/sec: 164.58 - lr: 0.000020
823
+ 2023-08-17 19:13:23,499 ----------------------------------------------------------------------------------------------------
824
+ 2023-08-17 19:13:23,499 EPOCH 32 done: loss 0.2646 - lr 0.000020
825
+ 2023-08-17 19:13:25,222 Evaluating as a multi-label problem: False
826
+ 2023-08-17 19:13:25,264 DEV : loss 0.22920973598957062 - f1-score (micro avg) 0.9793
827
+ 2023-08-17 19:13:25,274 ----------------------------------------------------------------------------------------------------
828
+ 2023-08-17 19:14:10,300 epoch 33 - iter 24/243 - loss 0.26866868 - time (sec): 45.03 - samples/sec: 174.08 - lr: 0.000020
829
+ 2023-08-17 19:14:55,125 epoch 33 - iter 48/243 - loss 0.25914800 - time (sec): 89.85 - samples/sec: 170.62 - lr: 0.000020
830
+ 2023-08-17 19:15:40,213 epoch 33 - iter 72/243 - loss 0.25631313 - time (sec): 134.94 - samples/sec: 170.94 - lr: 0.000020
831
+ 2023-08-17 19:16:25,469 epoch 33 - iter 96/243 - loss 0.25455371 - time (sec): 180.19 - samples/sec: 172.44 - lr: 0.000020
832
+ 2023-08-17 19:17:10,583 epoch 33 - iter 120/243 - loss 0.25585405 - time (sec): 225.31 - samples/sec: 172.36 - lr: 0.000020
833
+ 2023-08-17 19:17:55,604 epoch 33 - iter 144/243 - loss 0.25945055 - time (sec): 270.33 - samples/sec: 172.37 - lr: 0.000019
834
+ 2023-08-17 19:18:40,470 epoch 33 - iter 168/243 - loss 0.25932428 - time (sec): 315.20 - samples/sec: 172.06 - lr: 0.000019
835
+ 2023-08-17 19:19:25,614 epoch 33 - iter 192/243 - loss 0.25851724 - time (sec): 360.34 - samples/sec: 172.32 - lr: 0.000019
836
+ 2023-08-17 19:20:10,561 epoch 33 - iter 216/243 - loss 0.25678080 - time (sec): 405.29 - samples/sec: 172.32 - lr: 0.000019
837
+ 2023-08-17 19:20:55,632 epoch 33 - iter 240/243 - loss 0.25562158 - time (sec): 450.36 - samples/sec: 172.54 - lr: 0.000019
838
+ 2023-08-17 19:21:00,789 ----------------------------------------------------------------------------------------------------
839
+ 2023-08-17 19:21:00,789 EPOCH 33 done: loss 0.2552 - lr 0.000019
840
+ 2023-08-17 19:21:02,513 Evaluating as a multi-label problem: False
841
+ 2023-08-17 19:21:02,555 DEV : loss 0.23627179861068726 - f1-score (micro avg) 0.9791
842
+ 2023-08-17 19:21:02,565 ----------------------------------------------------------------------------------------------------
843
+ 2023-08-17 19:21:47,690 epoch 34 - iter 24/243 - loss 0.27182899 - time (sec): 45.13 - samples/sec: 175.31 - lr: 0.000019
844
+ 2023-08-17 19:22:33,006 epoch 34 - iter 48/243 - loss 0.27027922 - time (sec): 90.44 - samples/sec: 175.01 - lr: 0.000019
845
+ 2023-08-17 19:23:18,237 epoch 34 - iter 72/243 - loss 0.26451951 - time (sec): 135.67 - samples/sec: 174.30 - lr: 0.000019
846
+ 2023-08-17 19:24:03,126 epoch 34 - iter 96/243 - loss 0.26736759 - time (sec): 180.56 - samples/sec: 171.92 - lr: 0.000019
847
+ 2023-08-17 19:24:48,247 epoch 34 - iter 120/243 - loss 0.26439071 - time (sec): 225.68 - samples/sec: 172.25 - lr: 0.000018
848
+ 2023-08-17 19:25:33,434 epoch 34 - iter 144/243 - loss 0.26033732 - time (sec): 270.87 - samples/sec: 172.11 - lr: 0.000018
849
+ 2023-08-17 19:26:18,598 epoch 34 - iter 168/243 - loss 0.25756053 - time (sec): 316.03 - samples/sec: 171.90 - lr: 0.000018
850
+ 2023-08-17 19:27:03,825 epoch 34 - iter 192/243 - loss 0.26053780 - time (sec): 361.26 - samples/sec: 172.26 - lr: 0.000018
851
+ 2023-08-17 19:27:48,940 epoch 34 - iter 216/243 - loss 0.26079037 - time (sec): 406.37 - samples/sec: 172.40 - lr: 0.000018
852
+ 2023-08-17 19:28:34,034 epoch 34 - iter 240/243 - loss 0.25971768 - time (sec): 451.47 - samples/sec: 172.17 - lr: 0.000018
853
+ 2023-08-17 19:28:39,168 ----------------------------------------------------------------------------------------------------
854
+ 2023-08-17 19:28:39,168 EPOCH 34 done: loss 0.2595 - lr 0.000018
855
+ 2023-08-17 19:28:40,895 Evaluating as a multi-label problem: False
856
+ 2023-08-17 19:28:40,937 DEV : loss 0.23955273628234863 - f1-score (micro avg) 0.9796
857
+ 2023-08-17 19:28:40,947 ----------------------------------------------------------------------------------------------------
858
+ 2023-08-17 19:29:26,199 epoch 35 - iter 24/243 - loss 0.26701266 - time (sec): 45.25 - samples/sec: 176.48 - lr: 0.000018
859
+ 2023-08-17 19:30:11,305 epoch 35 - iter 48/243 - loss 0.25211759 - time (sec): 90.36 - samples/sec: 173.69 - lr: 0.000018
860
+ 2023-08-17 19:30:56,569 epoch 35 - iter 72/243 - loss 0.25876122 - time (sec): 135.62 - samples/sec: 173.91 - lr: 0.000018
861
+ 2023-08-17 19:31:41,748 epoch 35 - iter 96/243 - loss 0.25751966 - time (sec): 180.80 - samples/sec: 173.77 - lr: 0.000017
862
+ 2023-08-17 19:32:26,839 epoch 35 - iter 120/243 - loss 0.25782676 - time (sec): 225.89 - samples/sec: 172.54 - lr: 0.000017
863
+ 2023-08-17 19:33:11,980 epoch 35 - iter 144/243 - loss 0.26020302 - time (sec): 271.03 - samples/sec: 172.68 - lr: 0.000017
864
+ 2023-08-17 19:33:57,230 epoch 35 - iter 168/243 - loss 0.26431905 - time (sec): 316.28 - samples/sec: 173.19 - lr: 0.000017
865
+ 2023-08-17 19:34:42,376 epoch 35 - iter 192/243 - loss 0.26060801 - time (sec): 361.43 - samples/sec: 173.02 - lr: 0.000017
866
+ 2023-08-17 19:35:27,393 epoch 35 - iter 216/243 - loss 0.26100924 - time (sec): 406.45 - samples/sec: 172.55 - lr: 0.000017
867
+ 2023-08-17 19:36:12,551 epoch 35 - iter 240/243 - loss 0.26071736 - time (sec): 451.60 - samples/sec: 172.14 - lr: 0.000017
868
+ 2023-08-17 19:36:17,688 ----------------------------------------------------------------------------------------------------
869
+ 2023-08-17 19:36:17,688 EPOCH 35 done: loss 0.2611 - lr 0.000017
870
+ 2023-08-17 19:36:19,905 Evaluating as a multi-label problem: False
871
+ 2023-08-17 19:36:19,946 DEV : loss 0.24450713396072388 - f1-score (micro avg) 0.9791
872
+ 2023-08-17 19:36:19,957 ----------------------------------------------------------------------------------------------------
873
+ 2023-08-17 19:37:04,957 epoch 36 - iter 24/243 - loss 0.27084705 - time (sec): 45.00 - samples/sec: 173.44 - lr: 0.000017
874
+ 2023-08-17 19:37:49,857 epoch 36 - iter 48/243 - loss 0.25947400 - time (sec): 89.90 - samples/sec: 171.29 - lr: 0.000017
875
+ 2023-08-17 19:38:35,063 epoch 36 - iter 72/243 - loss 0.25687195 - time (sec): 135.11 - samples/sec: 173.64 - lr: 0.000016
876
+ 2023-08-17 19:39:19,922 epoch 36 - iter 96/243 - loss 0.25424198 - time (sec): 179.97 - samples/sec: 172.21 - lr: 0.000016
877
+ 2023-08-17 19:40:04,637 epoch 36 - iter 120/243 - loss 0.25557169 - time (sec): 224.68 - samples/sec: 171.71 - lr: 0.000016
878
+ 2023-08-17 19:40:49,808 epoch 36 - iter 144/243 - loss 0.25787383 - time (sec): 269.85 - samples/sec: 172.31 - lr: 0.000016
879
+ 2023-08-17 19:41:34,890 epoch 36 - iter 168/243 - loss 0.25642415 - time (sec): 314.93 - samples/sec: 172.48 - lr: 0.000016
880
+ 2023-08-17 19:42:20,042 epoch 36 - iter 192/243 - loss 0.25543523 - time (sec): 360.08 - samples/sec: 172.92 - lr: 0.000016
881
+ 2023-08-17 19:43:05,071 epoch 36 - iter 216/243 - loss 0.25443060 - time (sec): 405.11 - samples/sec: 172.85 - lr: 0.000016
882
+ 2023-08-17 19:43:50,066 epoch 36 - iter 240/243 - loss 0.25344304 - time (sec): 450.11 - samples/sec: 172.60 - lr: 0.000016
883
+ 2023-08-17 19:43:55,237 ----------------------------------------------------------------------------------------------------
884
+ 2023-08-17 19:43:55,238 EPOCH 36 done: loss 0.2536 - lr 0.000016
885
+ 2023-08-17 19:43:56,962 Evaluating as a multi-label problem: False
886
+ 2023-08-17 19:43:57,004 DEV : loss 0.2530966103076935 - f1-score (micro avg) 0.9788
887
+ 2023-08-17 19:43:57,015 ----------------------------------------------------------------------------------------------------
888
+ 2023-08-17 19:44:42,357 epoch 37 - iter 24/243 - loss 0.27190881 - time (sec): 45.34 - samples/sec: 182.10 - lr: 0.000016
889
+ 2023-08-17 19:45:27,461 epoch 37 - iter 48/243 - loss 0.26681536 - time (sec): 90.45 - samples/sec: 177.05 - lr: 0.000015
890
+ 2023-08-17 19:46:12,707 epoch 37 - iter 72/243 - loss 0.26204165 - time (sec): 135.69 - samples/sec: 175.59 - lr: 0.000015
891
+ 2023-08-17 19:46:57,756 epoch 37 - iter 96/243 - loss 0.25844813 - time (sec): 180.74 - samples/sec: 174.45 - lr: 0.000015
892
+ 2023-08-17 19:47:42,630 epoch 37 - iter 120/243 - loss 0.25889938 - time (sec): 225.62 - samples/sec: 173.20 - lr: 0.000015
893
+ 2023-08-17 19:48:27,811 epoch 37 - iter 144/243 - loss 0.26222809 - time (sec): 270.80 - samples/sec: 173.48 - lr: 0.000015
894
+ 2023-08-17 19:49:12,838 epoch 37 - iter 168/243 - loss 0.26407155 - time (sec): 315.82 - samples/sec: 173.22 - lr: 0.000015
895
+ 2023-08-17 19:49:57,837 epoch 37 - iter 192/243 - loss 0.26361155 - time (sec): 360.82 - samples/sec: 173.19 - lr: 0.000015
896
+ 2023-08-17 19:50:42,929 epoch 37 - iter 216/243 - loss 0.26668156 - time (sec): 405.91 - samples/sec: 173.18 - lr: 0.000015
897
+ 2023-08-17 19:51:27,770 epoch 37 - iter 240/243 - loss 0.26504239 - time (sec): 450.76 - samples/sec: 172.53 - lr: 0.000015
898
+ 2023-08-17 19:51:32,878 ----------------------------------------------------------------------------------------------------
899
+ 2023-08-17 19:51:32,878 EPOCH 37 done: loss 0.2650 - lr 0.000015
900
+ 2023-08-17 19:51:34,655 Evaluating as a multi-label problem: False
901
+ 2023-08-17 19:51:34,697 DEV : loss 0.2624962031841278 - f1-score (micro avg) 0.9781
902
+ 2023-08-17 19:51:34,707 ----------------------------------------------------------------------------------------------------
903
+ 2023-08-17 19:52:19,421 epoch 38 - iter 24/243 - loss 0.26162759 - time (sec): 44.71 - samples/sec: 172.54 - lr: 0.000014
904
+ 2023-08-17 19:53:04,244 epoch 38 - iter 48/243 - loss 0.26085357 - time (sec): 89.54 - samples/sec: 174.41 - lr: 0.000014
905
+ 2023-08-17 19:53:49,106 epoch 38 - iter 72/243 - loss 0.25308808 - time (sec): 134.40 - samples/sec: 175.19 - lr: 0.000014
906
+ 2023-08-17 19:54:33,752 epoch 38 - iter 96/243 - loss 0.25632516 - time (sec): 179.05 - samples/sec: 174.48 - lr: 0.000014
907
+ 2023-08-17 19:55:18,661 epoch 38 - iter 120/243 - loss 0.25358337 - time (sec): 223.95 - samples/sec: 175.31 - lr: 0.000014
908
+ 2023-08-17 19:56:03,661 epoch 38 - iter 144/243 - loss 0.25557088 - time (sec): 268.95 - samples/sec: 174.97 - lr: 0.000014
909
+ 2023-08-17 19:56:48,405 epoch 38 - iter 168/243 - loss 0.25407854 - time (sec): 313.70 - samples/sec: 175.34 - lr: 0.000014
910
+ 2023-08-17 19:57:32,972 epoch 38 - iter 192/243 - loss 0.25597339 - time (sec): 358.26 - samples/sec: 174.50 - lr: 0.000014
911
+ 2023-08-17 19:58:17,603 epoch 38 - iter 216/243 - loss 0.25532730 - time (sec): 402.90 - samples/sec: 174.17 - lr: 0.000014
912
+ 2023-08-17 19:59:03,800 epoch 38 - iter 240/243 - loss 0.25415245 - time (sec): 449.09 - samples/sec: 172.85 - lr: 0.000013
913
+ 2023-08-17 19:59:09,429 ----------------------------------------------------------------------------------------------------
914
+ 2023-08-17 19:59:09,429 EPOCH 38 done: loss 0.2542 - lr 0.000013
915
+ 2023-08-17 19:59:11,152 Evaluating as a multi-label problem: False
916
+ 2023-08-17 19:59:11,193 DEV : loss 0.24244999885559082 - f1-score (micro avg) 0.9788
917
+ 2023-08-17 19:59:11,204 ----------------------------------------------------------------------------------------------------
918
+ 2023-08-17 19:59:56,237 epoch 39 - iter 24/243 - loss 0.25336484 - time (sec): 45.03 - samples/sec: 174.16 - lr: 0.000013
919
+ 2023-08-17 20:00:41,351 epoch 39 - iter 48/243 - loss 0.25897743 - time (sec): 90.15 - samples/sec: 174.70 - lr: 0.000013
920
+ 2023-08-17 20:01:26,486 epoch 39 - iter 72/243 - loss 0.25769549 - time (sec): 135.28 - samples/sec: 172.24 - lr: 0.000013
921
+ 2023-08-17 20:02:11,729 epoch 39 - iter 96/243 - loss 0.25751150 - time (sec): 180.53 - samples/sec: 172.80 - lr: 0.000013
922
+ 2023-08-17 20:02:56,669 epoch 39 - iter 120/243 - loss 0.25315782 - time (sec): 225.47 - samples/sec: 172.18 - lr: 0.000013
923
+ 2023-08-17 20:03:41,567 epoch 39 - iter 144/243 - loss 0.25233489 - time (sec): 270.36 - samples/sec: 171.73 - lr: 0.000013
924
+ 2023-08-17 20:04:26,496 epoch 39 - iter 168/243 - loss 0.25114668 - time (sec): 315.29 - samples/sec: 171.56 - lr: 0.000013
925
+ 2023-08-17 20:05:11,629 epoch 39 - iter 192/243 - loss 0.25185953 - time (sec): 360.43 - samples/sec: 171.50 - lr: 0.000013
926
+ 2023-08-17 20:05:56,837 epoch 39 - iter 216/243 - loss 0.25746349 - time (sec): 405.63 - samples/sec: 172.32 - lr: 0.000012
927
+ 2023-08-17 20:06:41,874 epoch 39 - iter 240/243 - loss 0.25680252 - time (sec): 450.67 - samples/sec: 172.37 - lr: 0.000012
928
+ 2023-08-17 20:06:47,033 ----------------------------------------------------------------------------------------------------
929
+ 2023-08-17 20:06:47,033 EPOCH 39 done: loss 0.2579 - lr 0.000012
930
+ 2023-08-17 20:06:48,760 Evaluating as a multi-label problem: False
931
+ 2023-08-17 20:06:48,802 DEV : loss 0.24615894258022308 - f1-score (micro avg) 0.9798
932
+ 2023-08-17 20:06:48,812 ----------------------------------------------------------------------------------------------------
933
+ 2023-08-17 20:07:33,926 epoch 40 - iter 24/243 - loss 0.24837758 - time (sec): 45.11 - samples/sec: 172.25 - lr: 0.000012
934
+ 2023-08-17 20:08:19,310 epoch 40 - iter 48/243 - loss 0.24725040 - time (sec): 90.50 - samples/sec: 172.92 - lr: 0.000012
935
+ 2023-08-17 20:09:04,552 epoch 40 - iter 72/243 - loss 0.25023824 - time (sec): 135.74 - samples/sec: 173.72 - lr: 0.000012
936
+ 2023-08-17 20:09:49,591 epoch 40 - iter 96/243 - loss 0.24239002 - time (sec): 180.78 - samples/sec: 173.19 - lr: 0.000012
937
+ 2023-08-17 20:10:34,542 epoch 40 - iter 120/243 - loss 0.24524267 - time (sec): 225.73 - samples/sec: 172.25 - lr: 0.000012
938
+ 2023-08-17 20:11:19,609 epoch 40 - iter 144/243 - loss 0.24784591 - time (sec): 270.80 - samples/sec: 172.43 - lr: 0.000012
939
+ 2023-08-17 20:12:08,493 epoch 40 - iter 168/243 - loss 0.24872740 - time (sec): 319.68 - samples/sec: 169.61 - lr: 0.000012
940
+ 2023-08-17 20:12:54,900 epoch 40 - iter 192/243 - loss 0.25012412 - time (sec): 366.09 - samples/sec: 169.36 - lr: 0.000011
941
+ 2023-08-17 20:13:42,979 epoch 40 - iter 216/243 - loss 0.25345259 - time (sec): 414.17 - samples/sec: 168.78 - lr: 0.000011
942
+ 2023-08-17 20:14:31,756 epoch 40 - iter 240/243 - loss 0.25383699 - time (sec): 462.94 - samples/sec: 167.76 - lr: 0.000011
943
+ 2023-08-17 20:14:37,531 ----------------------------------------------------------------------------------------------------
944
+ 2023-08-17 20:14:37,531 EPOCH 40 done: loss 0.2540 - lr 0.000011
945
+ 2023-08-17 20:14:39,381 Evaluating as a multi-label problem: False
946
+ 2023-08-17 20:14:39,423 DEV : loss 0.2575598359107971 - f1-score (micro avg) 0.9791
947
+ 2023-08-17 20:14:39,433 ----------------------------------------------------------------------------------------------------
948
+ 2023-08-17 20:15:24,676 epoch 41 - iter 24/243 - loss 0.24306327 - time (sec): 45.24 - samples/sec: 171.89 - lr: 0.000011
949
+ 2023-08-17 20:16:09,952 epoch 41 - iter 48/243 - loss 0.24156726 - time (sec): 90.52 - samples/sec: 171.86 - lr: 0.000011
950
+ 2023-08-17 20:16:55,381 epoch 41 - iter 72/243 - loss 0.24869032 - time (sec): 135.95 - samples/sec: 173.38 - lr: 0.000011
951
+ 2023-08-17 20:17:40,497 epoch 41 - iter 96/243 - loss 0.25072177 - time (sec): 181.06 - samples/sec: 171.65 - lr: 0.000011
952
+ 2023-08-17 20:18:25,687 epoch 41 - iter 120/243 - loss 0.25396376 - time (sec): 226.25 - samples/sec: 170.55 - lr: 0.000011
953
+ 2023-08-17 20:19:11,133 epoch 41 - iter 144/243 - loss 0.25095812 - time (sec): 271.70 - samples/sec: 170.92 - lr: 0.000011
954
+ 2023-08-17 20:19:56,664 epoch 41 - iter 168/243 - loss 0.24810464 - time (sec): 317.23 - samples/sec: 171.61 - lr: 0.000010
955
+ 2023-08-17 20:20:41,990 epoch 41 - iter 192/243 - loss 0.24879453 - time (sec): 362.56 - samples/sec: 171.41 - lr: 0.000010
956
+ 2023-08-17 20:21:27,351 epoch 41 - iter 216/243 - loss 0.25177431 - time (sec): 407.92 - samples/sec: 171.85 - lr: 0.000010
957
+ 2023-08-17 20:22:12,639 epoch 41 - iter 240/243 - loss 0.25152758 - time (sec): 453.21 - samples/sec: 171.53 - lr: 0.000010
958
+ 2023-08-17 20:22:17,823 ----------------------------------------------------------------------------------------------------
959
+ 2023-08-17 20:22:17,823 EPOCH 41 done: loss 0.2509 - lr 0.000010
960
+ 2023-08-17 20:22:19,547 Evaluating as a multi-label problem: False
961
+ 2023-08-17 20:22:19,589 DEV : loss 0.25127604603767395 - f1-score (micro avg) 0.9786
962
+ 2023-08-17 20:22:19,600 ----------------------------------------------------------------------------------------------------
963
+ 2023-08-17 20:23:04,548 epoch 42 - iter 24/243 - loss 0.25413425 - time (sec): 44.95 - samples/sec: 163.63 - lr: 0.000010
964
+ 2023-08-17 20:23:49,857 epoch 42 - iter 48/243 - loss 0.25771203 - time (sec): 90.26 - samples/sec: 170.10 - lr: 0.000010
965
+ 2023-08-17 20:24:35,295 epoch 42 - iter 72/243 - loss 0.25402986 - time (sec): 135.70 - samples/sec: 170.29 - lr: 0.000010
966
+ 2023-08-17 20:25:20,626 epoch 42 - iter 96/243 - loss 0.25689370 - time (sec): 181.03 - samples/sec: 171.93 - lr: 0.000010
967
+ 2023-08-17 20:26:05,590 epoch 42 - iter 120/243 - loss 0.25635789 - time (sec): 225.99 - samples/sec: 170.42 - lr: 0.000010
968
+ 2023-08-17 20:26:50,916 epoch 42 - iter 144/243 - loss 0.25641142 - time (sec): 271.32 - samples/sec: 171.01 - lr: 0.000009
969
+ 2023-08-17 20:27:36,141 epoch 42 - iter 168/243 - loss 0.25676110 - time (sec): 316.54 - samples/sec: 171.50 - lr: 0.000009
970
+ 2023-08-17 20:28:21,268 epoch 42 - iter 192/243 - loss 0.25789268 - time (sec): 361.67 - samples/sec: 171.54 - lr: 0.000009
971
+ 2023-08-17 20:29:06,403 epoch 42 - iter 216/243 - loss 0.25889165 - time (sec): 406.80 - samples/sec: 172.07 - lr: 0.000009
972
+ 2023-08-17 20:29:51,477 epoch 42 - iter 240/243 - loss 0.25885055 - time (sec): 451.88 - samples/sec: 172.14 - lr: 0.000009
973
+ 2023-08-17 20:29:56,582 ----------------------------------------------------------------------------------------------------
974
+ 2023-08-17 20:29:56,582 EPOCH 42 done: loss 0.2584 - lr 0.000009
975
+ 2023-08-17 20:29:58,307 Evaluating as a multi-label problem: False
976
+ 2023-08-17 20:29:58,349 DEV : loss 0.2509002983570099 - f1-score (micro avg) 0.9776
977
+ 2023-08-17 20:29:58,359 ----------------------------------------------------------------------------------------------------
978
+ 2023-08-17 20:30:43,484 epoch 43 - iter 24/243 - loss 0.25656669 - time (sec): 45.13 - samples/sec: 174.29 - lr: 0.000009
979
+ 2023-08-17 20:31:28,785 epoch 43 - iter 48/243 - loss 0.25713909 - time (sec): 90.43 - samples/sec: 177.52 - lr: 0.000009
980
+ 2023-08-17 20:32:13,666 epoch 43 - iter 72/243 - loss 0.25209780 - time (sec): 135.31 - samples/sec: 174.82 - lr: 0.000009
981
+ 2023-08-17 20:32:58,675 epoch 43 - iter 96/243 - loss 0.24509857 - time (sec): 180.32 - samples/sec: 174.12 - lr: 0.000009
982
+ 2023-08-17 20:33:43,692 epoch 43 - iter 120/243 - loss 0.25000579 - time (sec): 225.33 - samples/sec: 173.22 - lr: 0.000008
983
+ 2023-08-17 20:34:28,765 epoch 43 - iter 144/243 - loss 0.25295949 - time (sec): 270.41 - samples/sec: 173.31 - lr: 0.000008
984
+ 2023-08-17 20:35:13,971 epoch 43 - iter 168/243 - loss 0.25493036 - time (sec): 315.61 - samples/sec: 173.93 - lr: 0.000008
985
+ 2023-08-17 20:35:58,754 epoch 43 - iter 192/243 - loss 0.25313033 - time (sec): 360.39 - samples/sec: 172.93 - lr: 0.000008
986
+ 2023-08-17 20:36:43,598 epoch 43 - iter 216/243 - loss 0.25255837 - time (sec): 405.24 - samples/sec: 172.41 - lr: 0.000008
987
+ 2023-08-17 20:37:28,672 epoch 43 - iter 240/243 - loss 0.25326105 - time (sec): 450.31 - samples/sec: 172.71 - lr: 0.000008
988
+ 2023-08-17 20:37:33,785 ----------------------------------------------------------------------------------------------------
989
+ 2023-08-17 20:37:33,786 EPOCH 43 done: loss 0.2536 - lr 0.000008
990
+ 2023-08-17 20:37:35,959 Evaluating as a multi-label problem: False
991
+ 2023-08-17 20:37:36,000 DEV : loss 0.25337928533554077 - f1-score (micro avg) 0.9784
992
+ 2023-08-17 20:37:36,011 ----------------------------------------------------------------------------------------------------
993
+ 2023-08-17 20:38:20,984 epoch 44 - iter 24/243 - loss 0.22752064 - time (sec): 44.97 - samples/sec: 167.41 - lr: 0.000008
994
+ 2023-08-17 20:39:06,170 epoch 44 - iter 48/243 - loss 0.23951614 - time (sec): 90.16 - samples/sec: 168.62 - lr: 0.000008
995
+ 2023-08-17 20:39:51,244 epoch 44 - iter 72/243 - loss 0.23986022 - time (sec): 135.23 - samples/sec: 169.56 - lr: 0.000008
996
+ 2023-08-17 20:40:36,601 epoch 44 - iter 96/243 - loss 0.24528781 - time (sec): 180.59 - samples/sec: 170.83 - lr: 0.000007
997
+ 2023-08-17 20:41:21,816 epoch 44 - iter 120/243 - loss 0.24572088 - time (sec): 225.80 - samples/sec: 170.78 - lr: 0.000007
998
+ 2023-08-17 20:42:06,954 epoch 44 - iter 144/243 - loss 0.24464183 - time (sec): 270.94 - samples/sec: 170.55 - lr: 0.000007
999
+ 2023-08-17 20:42:52,400 epoch 44 - iter 168/243 - loss 0.24523592 - time (sec): 316.39 - samples/sec: 171.33 - lr: 0.000007
1000
+ 2023-08-17 20:43:37,405 epoch 44 - iter 192/243 - loss 0.24519757 - time (sec): 361.39 - samples/sec: 171.38 - lr: 0.000007
1001
+ 2023-08-17 20:44:22,621 epoch 44 - iter 216/243 - loss 0.24456227 - time (sec): 406.61 - samples/sec: 171.83 - lr: 0.000007
1002
+ 2023-08-17 20:45:07,846 epoch 44 - iter 240/243 - loss 0.24582873 - time (sec): 451.84 - samples/sec: 171.90 - lr: 0.000007
1003
+ 2023-08-17 20:45:13,019 ----------------------------------------------------------------------------------------------------
1004
+ 2023-08-17 20:45:13,019 EPOCH 44 done: loss 0.2462 - lr 0.000007
1005
+ 2023-08-17 20:45:14,761 Evaluating as a multi-label problem: False
1006
+ 2023-08-17 20:45:14,803 DEV : loss 0.25915977358818054 - f1-score (micro avg) 0.9784
1007
+ 2023-08-17 20:45:14,813 ----------------------------------------------------------------------------------------------------
1008
+ 2023-08-17 20:45:59,917 epoch 45 - iter 24/243 - loss 0.26201019 - time (sec): 45.10 - samples/sec: 175.04 - lr: 0.000007
1009
+ 2023-08-17 20:46:44,992 epoch 45 - iter 48/243 - loss 0.24779270 - time (sec): 90.18 - samples/sec: 172.56 - lr: 0.000007
1010
+ 2023-08-17 20:47:30,101 epoch 45 - iter 72/243 - loss 0.25012887 - time (sec): 135.29 - samples/sec: 172.75 - lr: 0.000006
1011
+ 2023-08-17 20:48:15,149 epoch 45 - iter 96/243 - loss 0.25289868 - time (sec): 180.34 - samples/sec: 172.74 - lr: 0.000006
1012
+ 2023-08-17 20:49:00,142 epoch 45 - iter 120/243 - loss 0.25326284 - time (sec): 225.33 - samples/sec: 172.27 - lr: 0.000006
1013
+ 2023-08-17 20:49:45,326 epoch 45 - iter 144/243 - loss 0.25373868 - time (sec): 270.51 - samples/sec: 172.88 - lr: 0.000006
1014
+ 2023-08-17 20:50:30,290 epoch 45 - iter 168/243 - loss 0.25215421 - time (sec): 315.48 - samples/sec: 172.65 - lr: 0.000006
1015
+ 2023-08-17 20:51:15,228 epoch 45 - iter 192/243 - loss 0.25175489 - time (sec): 360.42 - samples/sec: 172.14 - lr: 0.000006
1016
+ 2023-08-17 20:52:00,245 epoch 45 - iter 216/243 - loss 0.24952171 - time (sec): 405.43 - samples/sec: 172.18 - lr: 0.000006
1017
+ 2023-08-17 20:52:45,370 epoch 45 - iter 240/243 - loss 0.25004168 - time (sec): 450.56 - samples/sec: 172.56 - lr: 0.000006
1018
+ 2023-08-17 20:52:50,494 ----------------------------------------------------------------------------------------------------
1019
+ 2023-08-17 20:52:50,494 EPOCH 45 done: loss 0.2503 - lr 0.000006
1020
+ 2023-08-17 20:52:52,332 Evaluating as a multi-label problem: False
1021
+ 2023-08-17 20:52:52,388 DEV : loss 0.2550533413887024 - f1-score (micro avg) 0.9788
1022
+ 2023-08-17 20:52:52,402 ----------------------------------------------------------------------------------------------------
1023
+ 2023-08-17 20:53:37,710 epoch 46 - iter 24/243 - loss 0.24479678 - time (sec): 45.31 - samples/sec: 174.98 - lr: 0.000006
1024
+ 2023-08-17 20:54:22,818 epoch 46 - iter 48/243 - loss 0.24138586 - time (sec): 90.42 - samples/sec: 173.24 - lr: 0.000005
1025
+ 2023-08-17 20:55:07,956 epoch 46 - iter 72/243 - loss 0.24404064 - time (sec): 135.55 - samples/sec: 172.85 - lr: 0.000005
1026
+ 2023-08-17 20:55:53,054 epoch 46 - iter 96/243 - loss 0.24604064 - time (sec): 180.65 - samples/sec: 171.68 - lr: 0.000005
1027
+ 2023-08-17 20:56:38,247 epoch 46 - iter 120/243 - loss 0.24783294 - time (sec): 225.84 - samples/sec: 172.24 - lr: 0.000005
1028
+ 2023-08-17 20:57:23,316 epoch 46 - iter 144/243 - loss 0.24973562 - time (sec): 270.91 - samples/sec: 172.22 - lr: 0.000005
1029
+ 2023-08-17 20:58:08,456 epoch 46 - iter 168/243 - loss 0.24967162 - time (sec): 316.05 - samples/sec: 171.90 - lr: 0.000005
1030
+ 2023-08-17 20:58:53,610 epoch 46 - iter 192/243 - loss 0.25131667 - time (sec): 361.21 - samples/sec: 172.04 - lr: 0.000005
1031
+ 2023-08-17 20:59:38,918 epoch 46 - iter 216/243 - loss 0.25004815 - time (sec): 406.52 - samples/sec: 172.17 - lr: 0.000005
1032
+ 2023-08-17 21:00:24,136 epoch 46 - iter 240/243 - loss 0.24797003 - time (sec): 451.73 - samples/sec: 172.25 - lr: 0.000005
1033
+ 2023-08-17 21:00:29,231 ----------------------------------------------------------------------------------------------------
1034
+ 2023-08-17 21:00:29,231 EPOCH 46 done: loss 0.2475 - lr 0.000005
1035
+ 2023-08-17 21:00:30,955 Evaluating as a multi-label problem: False
1036
+ 2023-08-17 21:00:30,997 DEV : loss 0.2502936124801636 - f1-score (micro avg) 0.9796
1037
+ 2023-08-17 21:00:31,007 ----------------------------------------------------------------------------------------------------
1038
+ 2023-08-17 21:01:16,086 epoch 47 - iter 24/243 - loss 0.24652539 - time (sec): 45.08 - samples/sec: 175.07 - lr: 0.000004
1039
+ 2023-08-17 21:02:01,372 epoch 47 - iter 48/243 - loss 0.25432254 - time (sec): 90.36 - samples/sec: 176.29 - lr: 0.000004
1040
+ 2023-08-17 21:02:46,656 epoch 47 - iter 72/243 - loss 0.24907829 - time (sec): 135.65 - samples/sec: 176.37 - lr: 0.000004
1041
+ 2023-08-17 21:03:31,414 epoch 47 - iter 96/243 - loss 0.25143514 - time (sec): 180.41 - samples/sec: 173.23 - lr: 0.000004
1042
+ 2023-08-17 21:04:16,401 epoch 47 - iter 120/243 - loss 0.25195942 - time (sec): 225.39 - samples/sec: 172.69 - lr: 0.000004
1043
+ 2023-08-17 21:05:01,676 epoch 47 - iter 144/243 - loss 0.25140692 - time (sec): 270.67 - samples/sec: 172.96 - lr: 0.000004
1044
+ 2023-08-17 21:05:46,804 epoch 47 - iter 168/243 - loss 0.25098133 - time (sec): 315.80 - samples/sec: 173.10 - lr: 0.000004
1045
+ 2023-08-17 21:06:31,774 epoch 47 - iter 192/243 - loss 0.24903435 - time (sec): 360.77 - samples/sec: 172.23 - lr: 0.000004
1046
+ 2023-08-17 21:07:16,864 epoch 47 - iter 216/243 - loss 0.24707558 - time (sec): 405.86 - samples/sec: 172.19 - lr: 0.000004
1047
+ 2023-08-17 21:08:02,008 epoch 47 - iter 240/243 - loss 0.24996260 - time (sec): 451.00 - samples/sec: 172.33 - lr: 0.000003
1048
+ 2023-08-17 21:08:07,158 ----------------------------------------------------------------------------------------------------
1049
+ 2023-08-17 21:08:07,159 EPOCH 47 done: loss 0.2500 - lr 0.000003
1050
+ 2023-08-17 21:08:08,882 Evaluating as a multi-label problem: False
1051
+ 2023-08-17 21:08:08,924 DEV : loss 0.25260353088378906 - f1-score (micro avg) 0.9788
1052
+ 2023-08-17 21:08:08,934 ----------------------------------------------------------------------------------------------------
1053
+ 2023-08-17 21:08:53,985 epoch 48 - iter 24/243 - loss 0.26092477 - time (sec): 45.05 - samples/sec: 171.65 - lr: 0.000003
1054
+ 2023-08-17 21:09:38,894 epoch 48 - iter 48/243 - loss 0.26380496 - time (sec): 89.96 - samples/sec: 170.60 - lr: 0.000003
1055
+ 2023-08-17 21:10:24,142 epoch 48 - iter 72/243 - loss 0.26586966 - time (sec): 135.21 - samples/sec: 173.82 - lr: 0.000003
1056
+ 2023-08-17 21:11:09,278 epoch 48 - iter 96/243 - loss 0.26118560 - time (sec): 180.34 - samples/sec: 173.40 - lr: 0.000003
1057
+ 2023-08-17 21:11:54,556 epoch 48 - iter 120/243 - loss 0.25715945 - time (sec): 225.62 - samples/sec: 174.07 - lr: 0.000003
1058
+ 2023-08-17 21:12:39,669 epoch 48 - iter 144/243 - loss 0.25935501 - time (sec): 270.73 - samples/sec: 174.42 - lr: 0.000003
1059
+ 2023-08-17 21:13:24,404 epoch 48 - iter 168/243 - loss 0.25807126 - time (sec): 315.47 - samples/sec: 173.68 - lr: 0.000003
1060
+ 2023-08-17 21:14:09,365 epoch 48 - iter 192/243 - loss 0.25819322 - time (sec): 360.43 - samples/sec: 173.11 - lr: 0.000003
1061
+ 2023-08-17 21:14:54,410 epoch 48 - iter 216/243 - loss 0.25780077 - time (sec): 405.48 - samples/sec: 173.28 - lr: 0.000002
1062
+ 2023-08-17 21:15:39,253 epoch 48 - iter 240/243 - loss 0.25669533 - time (sec): 450.32 - samples/sec: 172.63 - lr: 0.000002
1063
+ 2023-08-17 21:15:44,404 ----------------------------------------------------------------------------------------------------
1064
+ 2023-08-17 21:15:44,404 EPOCH 48 done: loss 0.2562 - lr 0.000002
1065
+ 2023-08-17 21:15:46,131 Evaluating as a multi-label problem: False
1066
+ 2023-08-17 21:15:46,173 DEV : loss 0.2517630159854889 - f1-score (micro avg) 0.9793
1067
+ 2023-08-17 21:15:46,183 ----------------------------------------------------------------------------------------------------
1068
+ 2023-08-17 21:16:31,546 epoch 49 - iter 24/243 - loss 0.27952006 - time (sec): 45.36 - samples/sec: 168.82 - lr: 0.000002
1069
+ 2023-08-17 21:17:16,632 epoch 49 - iter 48/243 - loss 0.26483505 - time (sec): 90.45 - samples/sec: 170.11 - lr: 0.000002
1070
+ 2023-08-17 21:18:01,622 epoch 49 - iter 72/243 - loss 0.25971199 - time (sec): 135.44 - samples/sec: 169.80 - lr: 0.000002
1071
+ 2023-08-17 21:18:47,114 epoch 49 - iter 96/243 - loss 0.25971123 - time (sec): 180.93 - samples/sec: 170.94 - lr: 0.000002
1072
+ 2023-08-17 21:19:32,644 epoch 49 - iter 120/243 - loss 0.25121870 - time (sec): 226.46 - samples/sec: 171.90 - lr: 0.000002
1073
+ 2023-08-17 21:20:17,801 epoch 49 - iter 144/243 - loss 0.24985456 - time (sec): 271.62 - samples/sec: 171.69 - lr: 0.000002
1074
+ 2023-08-17 21:21:02,857 epoch 49 - iter 168/243 - loss 0.25019492 - time (sec): 316.67 - samples/sec: 171.42 - lr: 0.000002
1075
+ 2023-08-17 21:21:48,180 epoch 49 - iter 192/243 - loss 0.24964407 - time (sec): 362.00 - samples/sec: 171.73 - lr: 0.000001
1076
+ 2023-08-17 21:22:33,397 epoch 49 - iter 216/243 - loss 0.24966262 - time (sec): 407.21 - samples/sec: 171.54 - lr: 0.000001
1077
+ 2023-08-17 21:23:18,568 epoch 49 - iter 240/243 - loss 0.24839303 - time (sec): 452.39 - samples/sec: 171.45 - lr: 0.000001
1078
+ 2023-08-17 21:23:23,804 ----------------------------------------------------------------------------------------------------
1079
+ 2023-08-17 21:23:23,804 EPOCH 49 done: loss 0.2480 - lr 0.000001
1080
+ 2023-08-17 21:23:25,551 Evaluating as a multi-label problem: False
1081
+ 2023-08-17 21:23:25,593 DEV : loss 0.25181668996810913 - f1-score (micro avg) 0.9786
1082
+ 2023-08-17 21:23:25,603 ----------------------------------------------------------------------------------------------------
1083
+ 2023-08-17 21:24:10,982 epoch 50 - iter 24/243 - loss 0.26114983 - time (sec): 45.38 - samples/sec: 182.97 - lr: 0.000001
1084
+ 2023-08-17 21:24:55,750 epoch 50 - iter 48/243 - loss 0.24629344 - time (sec): 90.15 - samples/sec: 175.47 - lr: 0.000001
1085
+ 2023-08-17 21:25:40,933 epoch 50 - iter 72/243 - loss 0.24771674 - time (sec): 135.33 - samples/sec: 174.06 - lr: 0.000001
1086
+ 2023-08-17 21:26:26,046 epoch 50 - iter 96/243 - loss 0.24705085 - time (sec): 180.44 - samples/sec: 174.69 - lr: 0.000001
1087
+ 2023-08-17 21:27:11,087 epoch 50 - iter 120/243 - loss 0.24435267 - time (sec): 225.48 - samples/sec: 173.57 - lr: 0.000001
1088
+ 2023-08-17 21:27:56,117 epoch 50 - iter 144/243 - loss 0.24537610 - time (sec): 270.51 - samples/sec: 173.20 - lr: 0.000001
1089
+ 2023-08-17 21:28:41,269 epoch 50 - iter 168/243 - loss 0.24725247 - time (sec): 315.67 - samples/sec: 173.29 - lr: 0.000000
1090
+ 2023-08-17 21:29:26,179 epoch 50 - iter 192/243 - loss 0.24773009 - time (sec): 360.58 - samples/sec: 172.82 - lr: 0.000000
1091
+ 2023-08-17 21:30:11,065 epoch 50 - iter 216/243 - loss 0.24906212 - time (sec): 405.46 - samples/sec: 172.43 - lr: 0.000000
1092
+ 2023-08-17 21:30:56,249 epoch 50 - iter 240/243 - loss 0.24977353 - time (sec): 450.65 - samples/sec: 172.59 - lr: 0.000000
1093
+ 2023-08-17 21:31:01,340 ----------------------------------------------------------------------------------------------------
1094
+ 2023-08-17 21:31:01,340 EPOCH 50 done: loss 0.2503 - lr 0.000000
1095
+ 2023-08-17 21:31:03,066 Evaluating as a multi-label problem: False
1096
+ 2023-08-17 21:31:03,108 DEV : loss 0.2513697147369385 - f1-score (micro avg) 0.9784
1097
+ 2023-08-17 21:31:05,400 Test data not provided setting final score to 0