Seth Kulick commited on
Commit
63a0ab7
·
1 Parent(s): dd15378

update test

Browse files
Files changed (3) hide show
  1. loss.tsv +2 -50
  2. pytorch_model.bin +2 -2
  3. training.log +53 -784
loss.tsv CHANGED
@@ -1,51 +1,3 @@
1
  EPOCH TIMESTAMP LEARNING_RATE TRAIN_LOSS DEV_LOSS DEV_PRECISION DEV_RECALL DEV_F1 DEV_ACCURACY
2
- 1 14:23:53 0.0000 4.21414778311098 1.6606154441833496 0.7017 0.7017 0.7017 0.7017
3
- 2 14:31:27 0.0000 1.236230191219479 0.4555579721927643 0.9132 0.9132 0.9132 0.9132
4
- 3 14:39:02 0.0000 0.649921288465607 0.247285857796669 0.9518 0.9518 0.9518 0.9518
5
- 4 14:47:06 0.0000 0.49171242865394643 0.18006576597690582 0.9648 0.9648 0.9648 0.9648
6
- 5 14:55:21 0.0000 0.42244408257393484 0.15854212641716003 0.9715 0.9715 0.9715 0.9715
7
- 6 15:02:59 0.0000 0.38888915953073067 0.1478930115699768 0.9729 0.9729 0.9729 0.9729
8
- 7 15:10:47 0.0000 0.36700320148296955 0.14240729808807373 0.9717 0.9717 0.9717 0.9717
9
- 8 15:18:56 0.0000 0.34955964649357035 0.13401205837726593 0.9752 0.9752 0.9752 0.9752
10
- 9 15:27:27 0.0000 0.3329301695936556 0.14190562069416046 0.9764 0.9764 0.9764 0.9764
11
- 10 15:35:06 0.0000 0.3320703285608999 0.1481310874223709 0.9734 0.9734 0.9734 0.9734
12
- 11 15:43:05 0.0000 0.3201111280509732 0.16022486984729767 0.9744 0.9744 0.9744 0.9744
13
- 12 15:50:40 0.0000 0.3146267273126732 0.17038877308368683 0.9764 0.9764 0.9764 0.9764
14
- 13 15:58:16 0.0000 0.3056304733563553 0.16180633008480072 0.9766 0.9766 0.9766 0.9766
15
- 14 16:06:46 0.0000 0.2946385615949501 0.1961415857076645 0.9729 0.9729 0.9729 0.9729
16
- 15 16:15:05 0.0000 0.29707305155274893 0.21415923535823822 0.9737 0.9737 0.9737 0.9737
17
- 16 16:23:13 0.0000 0.28580348285942486 0.17488490045070648 0.9764 0.9764 0.9764 0.9764
18
- 17 16:30:47 0.0000 0.28452900538217474 0.1961992233991623 0.9764 0.9764 0.9764 0.9764
19
- 18 16:38:27 0.0000 0.286532418628909 0.18113288283348083 0.9781 0.9781 0.9781 0.9781
20
- 19 16:46:56 0.0000 0.2808003542003455 0.2043328434228897 0.9793 0.9793 0.9793 0.9793
21
- 20 16:55:58 0.0000 0.28836057751744903 0.17976026237010956 0.9798 0.9798 0.9798 0.9798
22
- 21 17:03:34 0.0000 0.276102740426533 0.20532046258449554 0.9808 0.9808 0.9808 0.9808
23
- 22 17:11:12 0.0000 0.27382087732996463 0.20975473523139954 0.9771 0.9771 0.9771 0.9771
24
- 23 17:18:46 0.0000 0.27393156456791734 0.21456189453601837 0.9796 0.9796 0.9796 0.9796
25
- 24 17:26:20 0.0000 0.2696315985537938 0.21408958733081818 0.9788 0.9788 0.9788 0.9788
26
- 25 17:34:18 0.0000 0.2604978712176271 0.20778048038482666 0.9801 0.9801 0.9801 0.9801
27
- 26 17:42:35 0.0000 0.2631420220409018 0.22401468455791473 0.9786 0.9786 0.9786 0.9786
28
- 27 17:50:17 0.0000 0.2655839982462426 0.2324327975511551 0.9771 0.9771 0.9771 0.9771
29
- 28 17:57:50 0.0000 0.2628011544988305 0.21640333533287048 0.9803 0.9803 0.9803 0.9803
30
- 29 18:05:23 0.0000 0.26550006090015277 0.24248327314853668 0.9796 0.9796 0.9796 0.9796
31
- 30 18:14:03 0.0000 0.2652689226998264 0.23393450677394867 0.9776 0.9776 0.9776 0.9776
32
- 31 18:21:37 0.0000 0.25939785844109664 0.22774212062358856 0.9788 0.9788 0.9788 0.9788
33
- 32 18:29:23 0.0000 0.2645543534505578 0.22920973598957062 0.9793 0.9793 0.9793 0.9793
34
- 33 18:37:33 0.0000 0.255237703400159 0.23627179861068726 0.9791 0.9791 0.9791 0.9791
35
- 34 18:45:06 0.0000 0.2595120500430324 0.23955273628234863 0.9796 0.9796 0.9796 0.9796
36
- 35 18:52:39 0.0000 0.2611494515697348 0.24450713396072388 0.9791 0.9791 0.9791 0.9791
37
- 36 19:00:48 0.0000 0.25360077463430586 0.2530966103076935 0.9788 0.9788 0.9788 0.9788
38
- 37 19:08:22 0.0000 0.26495934852662506 0.2624962031841278 0.9781 0.9781 0.9781 0.9781
39
- 38 19:15:52 0.0000 0.25416150340144184 0.24244999885559082 0.9788 0.9788 0.9788 0.9788
40
- 39 19:23:23 0.0000 0.257929350459372 0.24615894258022308 0.9798 0.9798 0.9798 0.9798
41
- 40 19:30:54 0.0000 0.25402286565305776 0.2575598359107971 0.9791 0.9791 0.9791 0.9791
42
- 41 19:38:24 0.0000 0.2508873656720227 0.25127604603767395 0.9786 0.9786 0.9786 0.9786
43
- 42 19:45:54 0.0000 0.2584042182083517 0.2509002983570099 0.9776 0.9776 0.9776 0.9776
44
- 43 19:53:27 0.0000 0.2535730162199338 0.25337928533554077 0.9784 0.9784 0.9784 0.9784
45
- 44 20:01:00 0.0000 0.24615347105615198 0.25915977358818054 0.9784 0.9784 0.9784 0.9784
46
- 45 20:08:32 0.0000 0.2502548443814474 0.2550533413887024 0.9788 0.9788 0.9788 0.9788
47
- 46 20:16:06 0.0000 0.24752661908553505 0.2502936124801636 0.9796 0.9796 0.9796 0.9796
48
- 47 20:23:38 0.0000 0.25003396999949856 0.25260353088378906 0.9788 0.9788 0.9788 0.9788
49
- 48 20:31:11 0.0000 0.2562181207417887 0.2517630159854889 0.9793 0.9793 0.9793 0.9793
50
- 49 20:38:45 0.0000 0.2479874323703076 0.25181668996810913 0.9786 0.9786 0.9786 0.9786
51
- 50 20:46:19 0.0000 0.2503007775652108 0.2513697147369385 0.9784 0.9784 0.9784 0.9784
 
1
  EPOCH TIMESTAMP LEARNING_RATE TRAIN_LOSS DEV_LOSS DEV_PRECISION DEV_RECALL DEV_F1 DEV_ACCURACY
2
+ 1 13:29:52 0.0000 1.7334235176429078 0.3509514629840851 0.9331 0.9331 0.9331 0.9331
3
+ 2 13:38:31 0.0000 0.5678914814638107 0.23018118739128113 0.9562 0.9562 0.9562 0.9562
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f0a588d54c3890fc011cec47b5e3b8569e579ef1edd8ca48b0c688cf1e0fd14
3
- size 1129934059
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a3e480a68c134d5e7472d163b3430db9b146ed211912fb183a11d40d3a1d542
3
+ size 1129933867
training.log CHANGED
@@ -1,5 +1,5 @@
1
- 2023-07-26 14:15:51,620 ----------------------------------------------------------------------------------------------------
2
- 2023-07-26 14:15:51,621 Model: "SequenceTagger(
3
  (embeddings): TransformerWordEmbeddings(
4
  (model): XLMRobertaModel(
5
  (embeddings): XLMRobertaEmbeddings(
@@ -313,785 +313,54 @@
313
  (loss_function): ViterbiLoss()
314
  (crf): CRF()
315
  )"
316
- 2023-07-26 14:15:51,622 ----------------------------------------------------------------------------------------------------
317
- 2023-07-26 14:15:51,622 Corpus: "Corpus: 7767 train + 409 dev + 0 test sentences"
318
- 2023-07-26 14:15:51,622 ----------------------------------------------------------------------------------------------------
319
- 2023-07-26 14:15:51,622 Parameters:
320
- 2023-07-26 14:15:51,622 - learning_rate: "0.000050"
321
- 2023-07-26 14:15:51,622 - mini_batch_size: "32"
322
- 2023-07-26 14:15:51,622 - patience: "3"
323
- 2023-07-26 14:15:51,622 - anneal_factor: "0.5"
324
- 2023-07-26 14:15:51,622 - max_epochs: "50"
325
- 2023-07-26 14:15:51,622 - shuffle: "True"
326
- 2023-07-26 14:15:51,622 - train_with_dev: "False"
327
- 2023-07-26 14:15:51,622 - batch_growth_annealing: "False"
328
- 2023-07-26 14:15:51,622 ----------------------------------------------------------------------------------------------------
329
- 2023-07-26 14:15:51,622 Model training base path: "/scratch/skulick/ppchy-11-pos/xlmb-ck05-yid1/split_final/train"
330
- 2023-07-26 14:15:51,623 ----------------------------------------------------------------------------------------------------
331
- 2023-07-26 14:15:51,623 Device: cuda:0
332
- 2023-07-26 14:15:51,623 ----------------------------------------------------------------------------------------------------
333
- 2023-07-26 14:15:51,623 Embeddings storage mode: none
334
- 2023-07-26 14:15:51,623 ----------------------------------------------------------------------------------------------------
335
- 2023-07-26 14:16:43,612 epoch 1 - iter 24/243 - loss 5.78182875 - time (sec): 51.99 - samples/sec: 143.68 - lr: 0.000001
336
- 2023-07-26 14:17:33,864 epoch 1 - iter 48/243 - loss 5.72562134 - time (sec): 102.24 - samples/sec: 145.85 - lr: 0.000002
337
- 2023-07-26 14:18:18,655 epoch 1 - iter 72/243 - loss 5.61608578 - time (sec): 147.03 - samples/sec: 153.94 - lr: 0.000003
338
- 2023-07-26 14:19:03,423 epoch 1 - iter 96/243 - loss 5.47788448 - time (sec): 191.80 - samples/sec: 157.84 - lr: 0.000004
339
- 2023-07-26 14:19:48,078 epoch 1 - iter 120/243 - loss 5.26991238 - time (sec): 236.45 - samples/sec: 160.79 - lr: 0.000005
340
- 2023-07-26 14:20:32,722 epoch 1 - iter 144/243 - loss 5.07404788 - time (sec): 281.10 - samples/sec: 162.52 - lr: 0.000006
341
- 2023-07-26 14:21:18,887 epoch 1 - iter 168/243 - loss 4.86972776 - time (sec): 327.26 - samples/sec: 164.17 - lr: 0.000007
342
- 2023-07-26 14:22:07,928 epoch 1 - iter 192/243 - loss 4.66109804 - time (sec): 376.31 - samples/sec: 164.20 - lr: 0.000008
343
- 2023-07-26 14:22:56,658 epoch 1 - iter 216/243 - loss 4.44788101 - time (sec): 425.04 - samples/sec: 163.40 - lr: 0.000009
344
- 2023-07-26 14:23:45,642 epoch 1 - iter 240/243 - loss 4.23693631 - time (sec): 474.02 - samples/sec: 163.81 - lr: 0.000010
345
- 2023-07-26 14:23:51,273 ----------------------------------------------------------------------------------------------------
346
- 2023-07-26 14:23:51,273 EPOCH 1 done: loss 4.2141 - lr 0.000010
347
- 2023-07-26 14:23:53,076 Evaluating as a multi-label problem: False
348
- 2023-07-26 14:23:53,119 DEV : loss 1.6606154441833496 - f1-score (micro avg) 0.7017
349
- 2023-07-26 14:23:53,129 saving best model
350
- 2023-07-26 14:23:55,463 ----------------------------------------------------------------------------------------------------
351
- 2023-07-26 14:24:39,905 epoch 2 - iter 24/243 - loss 1.93643008 - time (sec): 44.44 - samples/sec: 172.43 - lr: 0.000011
352
- 2023-07-26 14:25:24,584 epoch 2 - iter 48/243 - loss 1.80170810 - time (sec): 89.12 - samples/sec: 174.28 - lr: 0.000012
353
- 2023-07-26 14:26:09,249 epoch 2 - iter 72/243 - loss 1.68553020 - time (sec): 133.79 - samples/sec: 174.97 - lr: 0.000013
354
- 2023-07-26 14:26:53,935 epoch 2 - iter 96/243 - loss 1.59018149 - time (sec): 178.47 - samples/sec: 175.90 - lr: 0.000014
355
- 2023-07-26 14:27:38,417 epoch 2 - iter 120/243 - loss 1.51168641 - time (sec): 222.95 - samples/sec: 176.47 - lr: 0.000015
356
- 2023-07-26 14:28:23,238 epoch 2 - iter 144/243 - loss 1.44496232 - time (sec): 267.77 - samples/sec: 176.24 - lr: 0.000016
357
- 2023-07-26 14:29:07,485 epoch 2 - iter 168/243 - loss 1.38343183 - time (sec): 312.02 - samples/sec: 175.74 - lr: 0.000017
358
- 2023-07-26 14:29:51,869 epoch 2 - iter 192/243 - loss 1.32848150 - time (sec): 356.41 - samples/sec: 175.52 - lr: 0.000018
359
- 2023-07-26 14:30:36,135 epoch 2 - iter 216/243 - loss 1.28678633 - time (sec): 400.67 - samples/sec: 174.52 - lr: 0.000019
360
- 2023-07-26 14:31:20,592 epoch 2 - iter 240/243 - loss 1.24063251 - time (sec): 445.13 - samples/sec: 174.61 - lr: 0.000020
361
- 2023-07-26 14:31:25,683 ----------------------------------------------------------------------------------------------------
362
- 2023-07-26 14:31:25,683 EPOCH 2 done: loss 1.2362 - lr 0.000020
363
- 2023-07-26 14:31:27,442 Evaluating as a multi-label problem: False
364
- 2023-07-26 14:31:27,484 DEV : loss 0.4555579721927643 - f1-score (micro avg) 0.9132
365
- 2023-07-26 14:31:27,494 saving best model
366
- 2023-07-26 14:31:30,740 ----------------------------------------------------------------------------------------------------
367
- 2023-07-26 14:32:15,374 epoch 3 - iter 24/243 - loss 0.80478615 - time (sec): 44.63 - samples/sec: 181.44 - lr: 0.000021
368
- 2023-07-26 14:32:59,817 epoch 3 - iter 48/243 - loss 0.76412570 - time (sec): 89.08 - samples/sec: 179.04 - lr: 0.000022
369
- 2023-07-26 14:33:44,386 epoch 3 - iter 72/243 - loss 0.74620943 - time (sec): 133.64 - samples/sec: 176.74 - lr: 0.000023
370
- 2023-07-26 14:34:28,788 epoch 3 - iter 96/243 - loss 0.72917808 - time (sec): 178.05 - samples/sec: 175.92 - lr: 0.000024
371
- 2023-07-26 14:35:13,386 epoch 3 - iter 120/243 - loss 0.72089137 - time (sec): 222.64 - samples/sec: 176.15 - lr: 0.000025
372
- 2023-07-26 14:35:57,934 epoch 3 - iter 144/243 - loss 0.70075087 - time (sec): 267.19 - samples/sec: 175.65 - lr: 0.000026
373
- 2023-07-26 14:36:42,264 epoch 3 - iter 168/243 - loss 0.68433087 - time (sec): 311.52 - samples/sec: 174.95 - lr: 0.000027
374
- 2023-07-26 14:37:26,778 epoch 3 - iter 192/243 - loss 0.67039041 - time (sec): 356.04 - samples/sec: 175.14 - lr: 0.000028
375
- 2023-07-26 14:38:11,135 epoch 3 - iter 216/243 - loss 0.66061953 - time (sec): 400.39 - samples/sec: 175.13 - lr: 0.000029
376
- 2023-07-26 14:38:55,563 epoch 3 - iter 240/243 - loss 0.65094446 - time (sec): 444.82 - samples/sec: 174.77 - lr: 0.000030
377
- 2023-07-26 14:39:00,596 ----------------------------------------------------------------------------------------------------
378
- 2023-07-26 14:39:00,596 EPOCH 3 done: loss 0.6499 - lr 0.000030
379
- 2023-07-26 14:39:02,334 Evaluating as a multi-label problem: False
380
- 2023-07-26 14:39:02,376 DEV : loss 0.247285857796669 - f1-score (micro avg) 0.9518
381
- 2023-07-26 14:39:02,385 saving best model
382
- 2023-07-26 14:39:05,730 ----------------------------------------------------------------------------------------------------
383
- 2023-07-26 14:39:50,093 epoch 4 - iter 24/243 - loss 0.55472967 - time (sec): 44.36 - samples/sec: 176.66 - lr: 0.000031
384
- 2023-07-26 14:40:34,562 epoch 4 - iter 48/243 - loss 0.52360637 - time (sec): 88.83 - samples/sec: 175.54 - lr: 0.000032
385
- 2023-07-26 14:41:19,307 epoch 4 - iter 72/243 - loss 0.51655667 - time (sec): 133.58 - samples/sec: 174.54 - lr: 0.000033
386
- 2023-07-26 14:42:05,307 epoch 4 - iter 96/243 - loss 0.51891961 - time (sec): 179.58 - samples/sec: 173.86 - lr: 0.000034
387
- 2023-07-26 14:42:54,480 epoch 4 - iter 120/243 - loss 0.50631556 - time (sec): 228.75 - samples/sec: 171.40 - lr: 0.000035
388
- 2023-07-26 14:43:43,446 epoch 4 - iter 144/243 - loss 0.50459545 - time (sec): 277.72 - samples/sec: 168.74 - lr: 0.000036
389
- 2023-07-26 14:44:32,519 epoch 4 - iter 168/243 - loss 0.50045519 - time (sec): 326.79 - samples/sec: 167.35 - lr: 0.000037
390
- 2023-07-26 14:45:21,599 epoch 4 - iter 192/243 - loss 0.49446570 - time (sec): 375.87 - samples/sec: 166.24 - lr: 0.000038
391
- 2023-07-26 14:46:10,542 epoch 4 - iter 216/243 - loss 0.49218271 - time (sec): 424.81 - samples/sec: 165.38 - lr: 0.000039
392
- 2023-07-26 14:46:59,284 epoch 4 - iter 240/243 - loss 0.49159525 - time (sec): 473.55 - samples/sec: 164.09 - lr: 0.000040
393
- 2023-07-26 14:47:04,893 ----------------------------------------------------------------------------------------------------
394
- 2023-07-26 14:47:04,893 EPOCH 4 done: loss 0.4917 - lr 0.000040
395
- 2023-07-26 14:47:06,684 Evaluating as a multi-label problem: False
396
- 2023-07-26 14:47:06,726 DEV : loss 0.18006576597690582 - f1-score (micro avg) 0.9648
397
- 2023-07-26 14:47:06,736 saving best model
398
- 2023-07-26 14:47:10,014 ----------------------------------------------------------------------------------------------------
399
- 2023-07-26 14:47:54,932 epoch 5 - iter 24/243 - loss 0.45058356 - time (sec): 44.92 - samples/sec: 173.25 - lr: 0.000041
400
- 2023-07-26 14:48:41,950 epoch 5 - iter 48/243 - loss 0.43329992 - time (sec): 91.94 - samples/sec: 169.29 - lr: 0.000042
401
- 2023-07-26 14:49:33,377 epoch 5 - iter 72/243 - loss 0.43373609 - time (sec): 143.36 - samples/sec: 163.90 - lr: 0.000043
402
- 2023-07-26 14:50:24,178 epoch 5 - iter 96/243 - loss 0.43090189 - time (sec): 194.16 - samples/sec: 160.68 - lr: 0.000044
403
- 2023-07-26 14:51:14,713 epoch 5 - iter 120/243 - loss 0.42730629 - time (sec): 244.70 - samples/sec: 158.36 - lr: 0.000045
404
- 2023-07-26 14:52:05,519 epoch 5 - iter 144/243 - loss 0.42510607 - time (sec): 295.50 - samples/sec: 157.71 - lr: 0.000046
405
- 2023-07-26 14:52:56,269 epoch 5 - iter 168/243 - loss 0.42354677 - time (sec): 346.25 - samples/sec: 157.30 - lr: 0.000047
406
- 2023-07-26 14:53:45,024 epoch 5 - iter 192/243 - loss 0.42562343 - time (sec): 395.01 - samples/sec: 157.75 - lr: 0.000048
407
- 2023-07-26 14:54:29,614 epoch 5 - iter 216/243 - loss 0.42329549 - time (sec): 439.60 - samples/sec: 159.49 - lr: 0.000049
408
- 2023-07-26 14:55:14,101 epoch 5 - iter 240/243 - loss 0.42313631 - time (sec): 484.09 - samples/sec: 160.63 - lr: 0.000050
409
- 2023-07-26 14:55:19,182 ----------------------------------------------------------------------------------------------------
410
- 2023-07-26 14:55:19,183 EPOCH 5 done: loss 0.4224 - lr 0.000050
411
- 2023-07-26 14:55:20,964 Evaluating as a multi-label problem: False
412
- 2023-07-26 14:55:21,010 DEV : loss 0.15854212641716003 - f1-score (micro avg) 0.9715
413
- 2023-07-26 14:55:21,021 saving best model
414
- 2023-07-26 14:55:24,373 ----------------------------------------------------------------------------------------------------
415
- 2023-07-26 14:56:09,000 epoch 6 - iter 24/243 - loss 0.38322411 - time (sec): 44.63 - samples/sec: 170.24 - lr: 0.000050
416
- 2023-07-26 14:56:53,917 epoch 6 - iter 48/243 - loss 0.38879490 - time (sec): 89.54 - samples/sec: 173.84 - lr: 0.000050
417
- 2023-07-26 14:57:38,715 epoch 6 - iter 72/243 - loss 0.39501775 - time (sec): 134.34 - samples/sec: 173.59 - lr: 0.000050
418
- 2023-07-26 14:58:23,414 epoch 6 - iter 96/243 - loss 0.39125526 - time (sec): 179.04 - samples/sec: 172.72 - lr: 0.000050
419
- 2023-07-26 14:59:08,294 epoch 6 - iter 120/243 - loss 0.38810381 - time (sec): 223.92 - samples/sec: 173.39 - lr: 0.000049
420
- 2023-07-26 14:59:53,048 epoch 6 - iter 144/243 - loss 0.38859919 - time (sec): 268.67 - samples/sec: 173.20 - lr: 0.000049
421
- 2023-07-26 15:00:37,709 epoch 6 - iter 168/243 - loss 0.39183603 - time (sec): 313.34 - samples/sec: 172.54 - lr: 0.000049
422
- 2023-07-26 15:01:22,620 epoch 6 - iter 192/243 - loss 0.39172498 - time (sec): 358.25 - samples/sec: 173.10 - lr: 0.000049
423
- 2023-07-26 15:02:07,658 epoch 6 - iter 216/243 - loss 0.38755663 - time (sec): 403.28 - samples/sec: 173.50 - lr: 0.000049
424
- 2023-07-26 15:02:52,481 epoch 6 - iter 240/243 - loss 0.38859503 - time (sec): 448.11 - samples/sec: 173.42 - lr: 0.000049
425
- 2023-07-26 15:02:57,605 ----------------------------------------------------------------------------------------------------
426
- 2023-07-26 15:02:57,605 EPOCH 6 done: loss 0.3889 - lr 0.000049
427
- 2023-07-26 15:02:59,359 Evaluating as a multi-label problem: False
428
- 2023-07-26 15:02:59,401 DEV : loss 0.1478930115699768 - f1-score (micro avg) 0.9729
429
- 2023-07-26 15:02:59,411 saving best model
430
- 2023-07-26 15:03:02,642 ----------------------------------------------------------------------------------------------------
431
- 2023-07-26 15:03:47,204 epoch 7 - iter 24/243 - loss 0.37119833 - time (sec): 44.56 - samples/sec: 170.57 - lr: 0.000049
432
- 2023-07-26 15:04:32,257 epoch 7 - iter 48/243 - loss 0.34925497 - time (sec): 89.61 - samples/sec: 170.90 - lr: 0.000049
433
- 2023-07-26 15:05:17,152 epoch 7 - iter 72/243 - loss 0.36339135 - time (sec): 134.51 - samples/sec: 170.74 - lr: 0.000049
434
- 2023-07-26 15:06:02,168 epoch 7 - iter 96/243 - loss 0.36053250 - time (sec): 179.53 - samples/sec: 172.30 - lr: 0.000048
435
- 2023-07-26 15:06:47,283 epoch 7 - iter 120/243 - loss 0.36487615 - time (sec): 224.64 - samples/sec: 173.25 - lr: 0.000048
436
- 2023-07-26 15:07:32,276 epoch 7 - iter 144/243 - loss 0.36319947 - time (sec): 269.63 - samples/sec: 173.36 - lr: 0.000048
437
- 2023-07-26 15:08:17,184 epoch 7 - iter 168/243 - loss 0.36321272 - time (sec): 314.54 - samples/sec: 173.50 - lr: 0.000048
438
- 2023-07-26 15:09:02,085 epoch 7 - iter 192/243 - loss 0.36447693 - time (sec): 359.44 - samples/sec: 173.23 - lr: 0.000048
439
- 2023-07-26 15:09:51,228 epoch 7 - iter 216/243 - loss 0.36744951 - time (sec): 408.59 - samples/sec: 171.35 - lr: 0.000048
440
- 2023-07-26 15:10:40,287 epoch 7 - iter 240/243 - loss 0.36634157 - time (sec): 457.64 - samples/sec: 169.91 - lr: 0.000048
441
- 2023-07-26 15:10:45,862 ----------------------------------------------------------------------------------------------------
442
- 2023-07-26 15:10:45,863 EPOCH 7 done: loss 0.3670 - lr 0.000048
443
- 2023-07-26 15:10:47,681 Evaluating as a multi-label problem: False
444
- 2023-07-26 15:10:47,726 DEV : loss 0.14240729808807373 - f1-score (micro avg) 0.9717
445
- 2023-07-26 15:10:47,736 ----------------------------------------------------------------------------------------------------
446
- 2023-07-26 15:11:32,421 epoch 8 - iter 24/243 - loss 0.35991738 - time (sec): 44.68 - samples/sec: 171.16 - lr: 0.000048
447
- 2023-07-26 15:12:16,856 epoch 8 - iter 48/243 - loss 0.34897131 - time (sec): 89.12 - samples/sec: 171.01 - lr: 0.000048
448
- 2023-07-26 15:13:01,243 epoch 8 - iter 72/243 - loss 0.34258107 - time (sec): 133.51 - samples/sec: 171.82 - lr: 0.000047
449
- 2023-07-26 15:13:45,557 epoch 8 - iter 96/243 - loss 0.34457191 - time (sec): 177.82 - samples/sec: 171.15 - lr: 0.000047
450
- 2023-07-26 15:14:33,081 epoch 8 - iter 120/243 - loss 0.34507195 - time (sec): 225.34 - samples/sec: 168.78 - lr: 0.000047
451
- 2023-07-26 15:15:23,807 epoch 8 - iter 144/243 - loss 0.34828898 - time (sec): 276.07 - samples/sec: 167.52 - lr: 0.000047
452
- 2023-07-26 15:16:16,673 epoch 8 - iter 168/243 - loss 0.34938445 - time (sec): 328.94 - samples/sec: 163.83 - lr: 0.000047
453
- 2023-07-26 15:17:08,647 epoch 8 - iter 192/243 - loss 0.34862273 - time (sec): 380.91 - samples/sec: 162.58 - lr: 0.000047
454
- 2023-07-26 15:17:59,292 epoch 8 - iter 216/243 - loss 0.34977990 - time (sec): 431.56 - samples/sec: 161.50 - lr: 0.000047
455
- 2023-07-26 15:18:48,823 epoch 8 - iter 240/243 - loss 0.34875804 - time (sec): 481.09 - samples/sec: 161.18 - lr: 0.000047
456
- 2023-07-26 15:18:54,694 ----------------------------------------------------------------------------------------------------
457
- 2023-07-26 15:18:54,694 EPOCH 8 done: loss 0.3496 - lr 0.000047
458
- 2023-07-26 15:18:56,484 Evaluating as a multi-label problem: False
459
- 2023-07-26 15:18:56,526 DEV : loss 0.13401205837726593 - f1-score (micro avg) 0.9752
460
- 2023-07-26 15:18:56,536 saving best model
461
- 2023-07-26 15:18:59,887 ----------------------------------------------------------------------------------------------------
462
- 2023-07-26 15:19:45,875 epoch 9 - iter 24/243 - loss 0.33211277 - time (sec): 45.99 - samples/sec: 171.57 - lr: 0.000047
463
- 2023-07-26 15:20:33,843 epoch 9 - iter 48/243 - loss 0.33508629 - time (sec): 93.96 - samples/sec: 171.82 - lr: 0.000046
464
- 2023-07-26 15:21:26,038 epoch 9 - iter 72/243 - loss 0.32662985 - time (sec): 146.15 - samples/sec: 162.61 - lr: 0.000046
465
- 2023-07-26 15:22:17,368 epoch 9 - iter 96/243 - loss 0.32958645 - time (sec): 197.48 - samples/sec: 159.51 - lr: 0.000046
466
- 2023-07-26 15:23:08,277 epoch 9 - iter 120/243 - loss 0.32364185 - time (sec): 248.39 - samples/sec: 157.62 - lr: 0.000046
467
- 2023-07-26 15:23:59,015 epoch 9 - iter 144/243 - loss 0.32701429 - time (sec): 299.13 - samples/sec: 156.28 - lr: 0.000046
468
- 2023-07-26 15:24:49,851 epoch 9 - iter 168/243 - loss 0.33017416 - time (sec): 349.96 - samples/sec: 155.73 - lr: 0.000046
469
- 2023-07-26 15:25:40,830 epoch 9 - iter 192/243 - loss 0.33104299 - time (sec): 400.94 - samples/sec: 156.11 - lr: 0.000046
470
- 2023-07-26 15:26:30,943 epoch 9 - iter 216/243 - loss 0.33454509 - time (sec): 451.06 - samples/sec: 155.81 - lr: 0.000046
471
- 2023-07-26 15:27:20,164 epoch 9 - iter 240/243 - loss 0.33386278 - time (sec): 500.28 - samples/sec: 155.37 - lr: 0.000046
472
- 2023-07-26 15:27:25,781 ----------------------------------------------------------------------------------------------------
473
- 2023-07-26 15:27:25,782 EPOCH 9 done: loss 0.3329 - lr 0.000046
474
- 2023-07-26 15:27:27,595 Evaluating as a multi-label problem: False
475
- 2023-07-26 15:27:27,637 DEV : loss 0.14190562069416046 - f1-score (micro avg) 0.9764
476
- 2023-07-26 15:27:27,647 saving best model
477
- 2023-07-26 15:27:31,002 ----------------------------------------------------------------------------------------------------
478
- 2023-07-26 15:28:16,088 epoch 10 - iter 24/243 - loss 0.34002265 - time (sec): 45.09 - samples/sec: 170.28 - lr: 0.000045
479
- 2023-07-26 15:29:00,810 epoch 10 - iter 48/243 - loss 0.33540108 - time (sec): 89.81 - samples/sec: 172.64 - lr: 0.000045
480
- 2023-07-26 15:29:45,833 epoch 10 - iter 72/243 - loss 0.33399184 - time (sec): 134.83 - samples/sec: 173.50 - lr: 0.000045
481
- 2023-07-26 15:30:30,533 epoch 10 - iter 96/243 - loss 0.32469492 - time (sec): 179.53 - samples/sec: 173.83 - lr: 0.000045
482
- 2023-07-26 15:31:15,030 epoch 10 - iter 120/243 - loss 0.32910415 - time (sec): 224.03 - samples/sec: 173.44 - lr: 0.000045
483
- 2023-07-26 15:31:59,646 epoch 10 - iter 144/243 - loss 0.32899582 - time (sec): 268.64 - samples/sec: 173.64 - lr: 0.000045
484
- 2023-07-26 15:32:44,609 epoch 10 - iter 168/243 - loss 0.33093813 - time (sec): 313.61 - samples/sec: 174.48 - lr: 0.000045
485
- 2023-07-26 15:33:29,306 epoch 10 - iter 192/243 - loss 0.33208597 - time (sec): 358.30 - samples/sec: 173.78 - lr: 0.000045
486
- 2023-07-26 15:34:14,223 epoch 10 - iter 216/243 - loss 0.33175324 - time (sec): 403.22 - samples/sec: 174.07 - lr: 0.000045
487
- 2023-07-26 15:34:58,900 epoch 10 - iter 240/243 - loss 0.33262740 - time (sec): 447.90 - samples/sec: 173.56 - lr: 0.000044
488
- 2023-07-26 15:35:04,010 ----------------------------------------------------------------------------------------------------
489
- 2023-07-26 15:35:04,010 EPOCH 10 done: loss 0.3321 - lr 0.000044
490
- 2023-07-26 15:35:06,264 Evaluating as a multi-label problem: False
491
- 2023-07-26 15:35:06,306 DEV : loss 0.1481310874223709 - f1-score (micro avg) 0.9734
492
- 2023-07-26 15:35:06,316 ----------------------------------------------------------------------------------------------------
493
- 2023-07-26 15:35:51,091 epoch 11 - iter 24/243 - loss 0.33230355 - time (sec): 44.77 - samples/sec: 172.33 - lr: 0.000044
494
- 2023-07-26 15:36:36,125 epoch 11 - iter 48/243 - loss 0.32441123 - time (sec): 89.81 - samples/sec: 170.71 - lr: 0.000044
495
- 2023-07-26 15:37:25,279 epoch 11 - iter 72/243 - loss 0.32514673 - time (sec): 138.96 - samples/sec: 167.78 - lr: 0.000044
496
- 2023-07-26 15:38:10,516 epoch 11 - iter 96/243 - loss 0.32235685 - time (sec): 184.20 - samples/sec: 169.57 - lr: 0.000044
497
- 2023-07-26 15:38:58,115 epoch 11 - iter 120/243 - loss 0.31705674 - time (sec): 231.80 - samples/sec: 167.98 - lr: 0.000044
498
- 2023-07-26 15:39:45,447 epoch 11 - iter 144/243 - loss 0.31351156 - time (sec): 279.13 - samples/sec: 166.74 - lr: 0.000044
499
- 2023-07-26 15:40:32,843 epoch 11 - iter 168/243 - loss 0.31453443 - time (sec): 326.53 - samples/sec: 166.47 - lr: 0.000044
500
- 2023-07-26 15:41:20,505 epoch 11 - iter 192/243 - loss 0.32048855 - time (sec): 374.19 - samples/sec: 166.74 - lr: 0.000044
501
- 2023-07-26 15:42:08,594 epoch 11 - iter 216/243 - loss 0.31914298 - time (sec): 422.28 - samples/sec: 166.07 - lr: 0.000043
502
- 2023-07-26 15:42:58,015 epoch 11 - iter 240/243 - loss 0.31938530 - time (sec): 471.70 - samples/sec: 164.83 - lr: 0.000043
503
- 2023-07-26 15:43:03,640 ----------------------------------------------------------------------------------------------------
504
- 2023-07-26 15:43:03,640 EPOCH 11 done: loss 0.3201 - lr 0.000043
505
- 2023-07-26 15:43:05,491 Evaluating as a multi-label problem: False
506
- 2023-07-26 15:43:05,538 DEV : loss 0.16022486984729767 - f1-score (micro avg) 0.9744
507
- 2023-07-26 15:43:05,549 ----------------------------------------------------------------------------------------------------
508
- 2023-07-26 15:43:51,010 epoch 12 - iter 24/243 - loss 0.30634651 - time (sec): 45.46 - samples/sec: 169.22 - lr: 0.000043
509
- 2023-07-26 15:44:35,828 epoch 12 - iter 48/243 - loss 0.32055500 - time (sec): 90.28 - samples/sec: 169.40 - lr: 0.000043
510
- 2023-07-26 15:45:20,616 epoch 12 - iter 72/243 - loss 0.31591461 - time (sec): 135.07 - samples/sec: 170.20 - lr: 0.000043
511
- 2023-07-26 15:46:05,323 epoch 12 - iter 96/243 - loss 0.31720616 - time (sec): 179.77 - samples/sec: 171.25 - lr: 0.000043
512
- 2023-07-26 15:46:50,172 epoch 12 - iter 120/243 - loss 0.31877634 - time (sec): 224.62 - samples/sec: 172.25 - lr: 0.000043
513
- 2023-07-26 15:47:34,948 epoch 12 - iter 144/243 - loss 0.31817728 - time (sec): 269.40 - samples/sec: 172.60 - lr: 0.000043
514
- 2023-07-26 15:48:19,648 epoch 12 - iter 168/243 - loss 0.31409341 - time (sec): 314.10 - samples/sec: 173.20 - lr: 0.000043
515
- 2023-07-26 15:49:04,450 epoch 12 - iter 192/243 - loss 0.31475214 - time (sec): 358.90 - samples/sec: 172.72 - lr: 0.000042
516
- 2023-07-26 15:49:49,156 epoch 12 - iter 216/243 - loss 0.31439205 - time (sec): 403.61 - samples/sec: 173.13 - lr: 0.000042
517
- 2023-07-26 15:50:33,925 epoch 12 - iter 240/243 - loss 0.31462372 - time (sec): 448.38 - samples/sec: 173.38 - lr: 0.000042
518
- 2023-07-26 15:50:39,009 ----------------------------------------------------------------------------------------------------
519
- 2023-07-26 15:50:39,009 EPOCH 12 done: loss 0.3146 - lr 0.000042
520
- 2023-07-26 15:50:40,760 Evaluating as a multi-label problem: False
521
- 2023-07-26 15:50:40,803 DEV : loss 0.17038877308368683 - f1-score (micro avg) 0.9764
522
- 2023-07-26 15:50:40,813 ----------------------------------------------------------------------------------------------------
523
- 2023-07-26 15:51:25,228 epoch 13 - iter 24/243 - loss 0.30871471 - time (sec): 44.42 - samples/sec: 169.20 - lr: 0.000042
524
- 2023-07-26 15:52:09,735 epoch 13 - iter 48/243 - loss 0.30951571 - time (sec): 88.92 - samples/sec: 169.92 - lr: 0.000042
525
- 2023-07-26 15:52:54,713 epoch 13 - iter 72/243 - loss 0.30146253 - time (sec): 133.90 - samples/sec: 170.69 - lr: 0.000042
526
- 2023-07-26 15:53:39,688 epoch 13 - iter 96/243 - loss 0.29818491 - time (sec): 178.88 - samples/sec: 171.59 - lr: 0.000042
527
- 2023-07-26 15:54:24,347 epoch 13 - iter 120/243 - loss 0.29829818 - time (sec): 223.53 - samples/sec: 171.45 - lr: 0.000042
528
- 2023-07-26 15:55:09,312 epoch 13 - iter 144/243 - loss 0.31111593 - time (sec): 268.50 - samples/sec: 171.76 - lr: 0.000042
529
- 2023-07-26 15:55:54,240 epoch 13 - iter 168/243 - loss 0.31147702 - time (sec): 313.43 - samples/sec: 171.94 - lr: 0.000041
530
- 2023-07-26 15:56:39,090 epoch 13 - iter 192/243 - loss 0.30976085 - time (sec): 358.28 - samples/sec: 172.90 - lr: 0.000041
531
- 2023-07-26 15:57:24,278 epoch 13 - iter 216/243 - loss 0.30904370 - time (sec): 403.46 - samples/sec: 173.00 - lr: 0.000041
532
- 2023-07-26 15:58:09,133 epoch 13 - iter 240/243 - loss 0.30572837 - time (sec): 448.32 - samples/sec: 173.40 - lr: 0.000041
533
- 2023-07-26 15:58:14,202 ----------------------------------------------------------------------------------------------------
534
- 2023-07-26 15:58:14,202 EPOCH 13 done: loss 0.3056 - lr 0.000041
535
- 2023-07-26 15:58:15,991 Evaluating as a multi-label problem: False
536
- 2023-07-26 15:58:16,034 DEV : loss 0.16180633008480072 - f1-score (micro avg) 0.9766
537
- 2023-07-26 15:58:16,044 saving best model
538
- 2023-07-26 15:58:19,355 ----------------------------------------------------------------------------------------------------
539
- 2023-07-26 15:59:06,668 epoch 14 - iter 24/243 - loss 0.28577045 - time (sec): 47.31 - samples/sec: 164.50 - lr: 0.000041
540
- 2023-07-26 15:59:56,998 epoch 14 - iter 48/243 - loss 0.28369661 - time (sec): 97.64 - samples/sec: 158.27 - lr: 0.000041
541
- 2023-07-26 16:00:51,211 epoch 14 - iter 72/243 - loss 0.29071442 - time (sec): 151.86 - samples/sec: 153.53 - lr: 0.000041
542
- 2023-07-26 16:01:43,557 epoch 14 - iter 96/243 - loss 0.29219267 - time (sec): 204.20 - samples/sec: 154.01 - lr: 0.000041
543
- 2023-07-26 16:02:32,810 epoch 14 - iter 120/243 - loss 0.29452027 - time (sec): 253.45 - samples/sec: 154.42 - lr: 0.000041
544
- 2023-07-26 16:03:22,073 epoch 14 - iter 144/243 - loss 0.28860385 - time (sec): 302.72 - samples/sec: 154.60 - lr: 0.000040
545
- 2023-07-26 16:04:11,432 epoch 14 - iter 168/243 - loss 0.29040567 - time (sec): 352.08 - samples/sec: 155.10 - lr: 0.000040
546
- 2023-07-26 16:05:00,439 epoch 14 - iter 192/243 - loss 0.29057669 - time (sec): 401.08 - samples/sec: 155.56 - lr: 0.000040
547
- 2023-07-26 16:05:49,734 epoch 14 - iter 216/243 - loss 0.29351512 - time (sec): 450.38 - samples/sec: 155.77 - lr: 0.000040
548
- 2023-07-26 16:06:38,920 epoch 14 - iter 240/243 - loss 0.29475470 - time (sec): 499.56 - samples/sec: 155.77 - lr: 0.000040
549
- 2023-07-26 16:06:44,452 ----------------------------------------------------------------------------------------------------
550
- 2023-07-26 16:06:44,452 EPOCH 14 done: loss 0.2946 - lr 0.000040
551
- 2023-07-26 16:06:46,282 Evaluating as a multi-label problem: False
552
- 2023-07-26 16:06:46,328 DEV : loss 0.1961415857076645 - f1-score (micro avg) 0.9729
553
- 2023-07-26 16:06:46,338 ----------------------------------------------------------------------------------------------------
554
- 2023-07-26 16:07:31,298 epoch 15 - iter 24/243 - loss 0.32628632 - time (sec): 44.96 - samples/sec: 171.98 - lr: 0.000040
555
- 2023-07-26 16:08:21,094 epoch 15 - iter 48/243 - loss 0.30408958 - time (sec): 94.76 - samples/sec: 164.10 - lr: 0.000040
556
- 2023-07-26 16:09:15,364 epoch 15 - iter 72/243 - loss 0.29750206 - time (sec): 149.03 - samples/sec: 157.51 - lr: 0.000040
557
- 2023-07-26 16:10:06,024 epoch 15 - iter 96/243 - loss 0.29760832 - time (sec): 199.69 - samples/sec: 155.97 - lr: 0.000040
558
- 2023-07-26 16:10:56,205 epoch 15 - iter 120/243 - loss 0.29974418 - time (sec): 249.87 - samples/sec: 155.76 - lr: 0.000039
559
- 2023-07-26 16:11:43,301 epoch 15 - iter 144/243 - loss 0.29904887 - time (sec): 296.96 - samples/sec: 157.05 - lr: 0.000039
560
- 2023-07-26 16:12:31,170 epoch 15 - iter 168/243 - loss 0.29894209 - time (sec): 344.83 - samples/sec: 157.73 - lr: 0.000039
561
- 2023-07-26 16:13:20,187 epoch 15 - iter 192/243 - loss 0.29754010 - time (sec): 393.85 - samples/sec: 157.85 - lr: 0.000039
562
- 2023-07-26 16:14:09,012 epoch 15 - iter 216/243 - loss 0.29884402 - time (sec): 442.67 - samples/sec: 157.79 - lr: 0.000039
563
- 2023-07-26 16:14:57,878 epoch 15 - iter 240/243 - loss 0.29706337 - time (sec): 491.54 - samples/sec: 158.08 - lr: 0.000039
564
- 2023-07-26 16:15:03,351 ----------------------------------------------------------------------------------------------------
565
- 2023-07-26 16:15:03,351 EPOCH 15 done: loss 0.2971 - lr 0.000039
566
- 2023-07-26 16:15:05,134 Evaluating as a multi-label problem: False
567
- 2023-07-26 16:15:05,176 DEV : loss 0.21415923535823822 - f1-score (micro avg) 0.9737
568
- 2023-07-26 16:15:05,186 ----------------------------------------------------------------------------------------------------
569
- 2023-07-26 16:15:50,049 epoch 16 - iter 24/243 - loss 0.32918671 - time (sec): 44.86 - samples/sec: 172.79 - lr: 0.000039
570
- 2023-07-26 16:16:34,768 epoch 16 - iter 48/243 - loss 0.30668793 - time (sec): 89.58 - samples/sec: 172.52 - lr: 0.000039
571
- 2023-07-26 16:17:19,891 epoch 16 - iter 72/243 - loss 0.30165600 - time (sec): 134.70 - samples/sec: 171.72 - lr: 0.000039
572
- 2023-07-26 16:18:09,624 epoch 16 - iter 96/243 - loss 0.29977956 - time (sec): 184.44 - samples/sec: 168.02 - lr: 0.000038
573
- 2023-07-26 16:18:58,935 epoch 16 - iter 120/243 - loss 0.29035278 - time (sec): 233.75 - samples/sec: 165.52 - lr: 0.000038
574
- 2023-07-26 16:19:48,358 epoch 16 - iter 144/243 - loss 0.28688344 - time (sec): 283.17 - samples/sec: 164.52 - lr: 0.000038
575
- 2023-07-26 16:20:37,728 epoch 16 - iter 168/243 - loss 0.28573744 - time (sec): 332.54 - samples/sec: 163.65 - lr: 0.000038
576
- 2023-07-26 16:21:26,994 epoch 16 - iter 192/243 - loss 0.28483557 - time (sec): 381.81 - samples/sec: 162.65 - lr: 0.000038
577
- 2023-07-26 16:22:16,480 epoch 16 - iter 216/243 - loss 0.28487700 - time (sec): 431.29 - samples/sec: 162.23 - lr: 0.000038
578
- 2023-07-26 16:23:05,837 epoch 16 - iter 240/243 - loss 0.28570848 - time (sec): 480.65 - samples/sec: 161.78 - lr: 0.000038
579
- 2023-07-26 16:23:11,437 ----------------------------------------------------------------------------------------------------
580
- 2023-07-26 16:23:11,437 EPOCH 16 done: loss 0.2858 - lr 0.000038
581
- 2023-07-26 16:23:13,234 Evaluating as a multi-label problem: False
582
- 2023-07-26 16:23:13,276 DEV : loss 0.17488490045070648 - f1-score (micro avg) 0.9764
583
- 2023-07-26 16:23:13,286 ----------------------------------------------------------------------------------------------------
584
- 2023-07-26 16:23:58,069 epoch 17 - iter 24/243 - loss 0.28223418 - time (sec): 44.78 - samples/sec: 169.35 - lr: 0.000038
585
- 2023-07-26 16:24:42,914 epoch 17 - iter 48/243 - loss 0.28773045 - time (sec): 89.63 - samples/sec: 170.29 - lr: 0.000038
586
- 2023-07-26 16:25:28,001 epoch 17 - iter 72/243 - loss 0.28949629 - time (sec): 134.72 - samples/sec: 171.86 - lr: 0.000037
587
- 2023-07-26 16:26:12,604 epoch 17 - iter 96/243 - loss 0.29081122 - time (sec): 179.32 - samples/sec: 172.97 - lr: 0.000037
588
- 2023-07-26 16:26:57,287 epoch 17 - iter 120/243 - loss 0.28910214 - time (sec): 224.00 - samples/sec: 173.28 - lr: 0.000037
589
- 2023-07-26 16:27:41,994 epoch 17 - iter 144/243 - loss 0.28813940 - time (sec): 268.71 - samples/sec: 173.98 - lr: 0.000037
590
- 2023-07-26 16:28:26,701 epoch 17 - iter 168/243 - loss 0.28649377 - time (sec): 313.42 - samples/sec: 174.08 - lr: 0.000037
591
- 2023-07-26 16:29:11,540 epoch 17 - iter 192/243 - loss 0.28690817 - time (sec): 358.25 - samples/sec: 174.44 - lr: 0.000037
592
- 2023-07-26 16:29:56,114 epoch 17 - iter 216/243 - loss 0.28529445 - time (sec): 402.83 - samples/sec: 173.78 - lr: 0.000037
593
- 2023-07-26 16:30:40,993 epoch 17 - iter 240/243 - loss 0.28495055 - time (sec): 447.71 - samples/sec: 173.50 - lr: 0.000037
594
- 2023-07-26 16:30:46,121 ----------------------------------------------------------------------------------------------------
595
- 2023-07-26 16:30:46,122 EPOCH 17 done: loss 0.2845 - lr 0.000037
596
- 2023-07-26 16:30:47,874 Evaluating as a multi-label problem: False
597
- 2023-07-26 16:30:47,918 DEV : loss 0.1961992233991623 - f1-score (micro avg) 0.9764
598
- 2023-07-26 16:30:47,928 ----------------------------------------------------------------------------------------------------
599
- 2023-07-26 16:31:33,170 epoch 18 - iter 24/243 - loss 0.28778804 - time (sec): 45.24 - samples/sec: 183.77 - lr: 0.000037
600
- 2023-07-26 16:32:17,561 epoch 18 - iter 48/243 - loss 0.28633144 - time (sec): 89.63 - samples/sec: 178.16 - lr: 0.000036
601
- 2023-07-26 16:33:02,262 epoch 18 - iter 72/243 - loss 0.28829018 - time (sec): 134.33 - samples/sec: 176.29 - lr: 0.000036
602
- 2023-07-26 16:33:47,023 epoch 18 - iter 96/243 - loss 0.28737825 - time (sec): 179.10 - samples/sec: 176.55 - lr: 0.000036
603
- 2023-07-26 16:34:31,632 epoch 18 - iter 120/243 - loss 0.28870528 - time (sec): 223.70 - samples/sec: 176.96 - lr: 0.000036
604
- 2023-07-26 16:35:16,249 epoch 18 - iter 144/243 - loss 0.28536506 - time (sec): 268.32 - samples/sec: 176.48 - lr: 0.000036
605
- 2023-07-26 16:36:01,090 epoch 18 - iter 168/243 - loss 0.28612314 - time (sec): 313.16 - samples/sec: 175.92 - lr: 0.000036
606
- 2023-07-26 16:36:46,062 epoch 18 - iter 192/243 - loss 0.28681958 - time (sec): 358.13 - samples/sec: 174.98 - lr: 0.000036
607
- 2023-07-26 16:37:31,063 epoch 18 - iter 216/243 - loss 0.28815101 - time (sec): 403.14 - samples/sec: 174.53 - lr: 0.000036
608
- 2023-07-26 16:38:19,082 epoch 18 - iter 240/243 - loss 0.28697818 - time (sec): 451.15 - samples/sec: 172.46 - lr: 0.000036
609
- 2023-07-26 16:38:24,758 ----------------------------------------------------------------------------------------------------
610
- 2023-07-26 16:38:24,759 EPOCH 18 done: loss 0.2865 - lr 0.000036
611
- 2023-07-26 16:38:27,073 Evaluating as a multi-label problem: False
612
- 2023-07-26 16:38:27,115 DEV : loss 0.18113288283348083 - f1-score (micro avg) 0.9781
613
- 2023-07-26 16:38:27,126 saving best model
614
- 2023-07-26 16:38:30,288 ----------------------------------------------------------------------------------------------------
615
- 2023-07-26 16:39:21,782 epoch 19 - iter 24/243 - loss 0.28138164 - time (sec): 51.49 - samples/sec: 154.23 - lr: 0.000036
616
- 2023-07-26 16:40:12,650 epoch 19 - iter 48/243 - loss 0.28992986 - time (sec): 102.36 - samples/sec: 150.35 - lr: 0.000035
617
- 2023-07-26 16:41:02,164 epoch 19 - iter 72/243 - loss 0.28244605 - time (sec): 151.88 - samples/sec: 152.82 - lr: 0.000035
618
- 2023-07-26 16:41:52,390 epoch 19 - iter 96/243 - loss 0.28642854 - time (sec): 202.10 - samples/sec: 152.66 - lr: 0.000035
619
- 2023-07-26 16:42:44,635 epoch 19 - iter 120/243 - loss 0.28768114 - time (sec): 254.35 - samples/sec: 151.96 - lr: 0.000035
620
- 2023-07-26 16:43:33,907 epoch 19 - iter 144/243 - loss 0.28722806 - time (sec): 303.62 - samples/sec: 153.22 - lr: 0.000035
621
- 2023-07-26 16:44:23,000 epoch 19 - iter 168/243 - loss 0.28477685 - time (sec): 352.71 - samples/sec: 154.35 - lr: 0.000035
622
- 2023-07-26 16:45:11,847 epoch 19 - iter 192/243 - loss 0.28564618 - time (sec): 401.56 - samples/sec: 155.01 - lr: 0.000035
623
- 2023-07-26 16:46:00,662 epoch 19 - iter 216/243 - loss 0.28166734 - time (sec): 450.37 - samples/sec: 155.14 - lr: 0.000035
624
- 2023-07-26 16:46:49,519 epoch 19 - iter 240/243 - loss 0.28044622 - time (sec): 499.23 - samples/sec: 155.64 - lr: 0.000035
625
- 2023-07-26 16:46:55,052 ----------------------------------------------------------------------------------------------------
626
- 2023-07-26 16:46:55,052 EPOCH 19 done: loss 0.2808 - lr 0.000035
627
- 2023-07-26 16:46:56,840 Evaluating as a multi-label problem: False
628
- 2023-07-26 16:46:56,881 DEV : loss 0.2043328434228897 - f1-score (micro avg) 0.9793
629
- 2023-07-26 16:46:56,891 saving best model
630
- 2023-07-26 16:47:00,311 ----------------------------------------------------------------------------------------------------
631
- 2023-07-26 16:47:53,948 epoch 20 - iter 24/243 - loss 0.28666954 - time (sec): 53.64 - samples/sec: 145.20 - lr: 0.000034
632
- 2023-07-26 16:48:47,747 epoch 20 - iter 48/243 - loss 0.29481761 - time (sec): 107.44 - samples/sec: 143.54 - lr: 0.000034
633
- 2023-07-26 16:49:41,712 epoch 20 - iter 72/243 - loss 0.29914317 - time (sec): 161.40 - samples/sec: 143.67 - lr: 0.000034
634
- 2023-07-26 16:50:34,164 epoch 20 - iter 96/243 - loss 0.29393948 - time (sec): 213.85 - samples/sec: 144.15 - lr: 0.000034
635
- 2023-07-26 16:51:26,758 epoch 20 - iter 120/243 - loss 0.29259273 - time (sec): 266.45 - samples/sec: 144.69 - lr: 0.000034
636
- 2023-07-26 16:52:19,496 epoch 20 - iter 144/243 - loss 0.29189521 - time (sec): 319.18 - samples/sec: 145.56 - lr: 0.000034
637
- 2023-07-26 16:53:12,248 epoch 20 - iter 168/243 - loss 0.29174956 - time (sec): 371.94 - samples/sec: 146.27 - lr: 0.000034
638
- 2023-07-26 16:54:04,770 epoch 20 - iter 192/243 - loss 0.28991116 - time (sec): 424.46 - samples/sec: 146.24 - lr: 0.000034
639
- 2023-07-26 16:54:57,220 epoch 20 - iter 216/243 - loss 0.28908421 - time (sec): 476.91 - samples/sec: 146.03 - lr: 0.000034
640
- 2023-07-26 16:55:50,110 epoch 20 - iter 240/243 - loss 0.28802142 - time (sec): 529.80 - samples/sec: 146.82 - lr: 0.000033
641
- 2023-07-26 16:55:56,063 ----------------------------------------------------------------------------------------------------
642
- 2023-07-26 16:55:56,064 EPOCH 20 done: loss 0.2884 - lr 0.000033
643
- 2023-07-26 16:55:58,153 Evaluating as a multi-label problem: False
644
- 2023-07-26 16:55:58,197 DEV : loss 0.17976026237010956 - f1-score (micro avg) 0.9798
645
- 2023-07-26 16:55:58,210 saving best model
646
- 2023-07-26 16:56:01,163 ----------------------------------------------------------------------------------------------------
647
- 2023-07-26 16:56:45,917 epoch 21 - iter 24/243 - loss 0.27074814 - time (sec): 44.75 - samples/sec: 174.64 - lr: 0.000033
648
- 2023-07-26 16:57:30,503 epoch 21 - iter 48/243 - loss 0.27757152 - time (sec): 89.34 - samples/sec: 172.96 - lr: 0.000033
649
- 2023-07-26 16:58:15,097 epoch 21 - iter 72/243 - loss 0.27454337 - time (sec): 133.93 - samples/sec: 173.08 - lr: 0.000033
650
- 2023-07-26 16:58:59,717 epoch 21 - iter 96/243 - loss 0.27609707 - time (sec): 178.55 - samples/sec: 172.80 - lr: 0.000033
651
- 2023-07-26 16:59:44,372 epoch 21 - iter 120/243 - loss 0.27224083 - time (sec): 223.21 - samples/sec: 172.96 - lr: 0.000033
652
- 2023-07-26 17:00:29,083 epoch 21 - iter 144/243 - loss 0.27850149 - time (sec): 267.92 - samples/sec: 172.72 - lr: 0.000033
653
- 2023-07-26 17:01:13,636 epoch 21 - iter 168/243 - loss 0.27696398 - time (sec): 312.47 - samples/sec: 172.79 - lr: 0.000033
654
- 2023-07-26 17:01:58,291 epoch 21 - iter 192/243 - loss 0.27664755 - time (sec): 357.13 - samples/sec: 172.80 - lr: 0.000033
655
- 2023-07-26 17:02:43,178 epoch 21 - iter 216/243 - loss 0.27558848 - time (sec): 402.01 - samples/sec: 173.76 - lr: 0.000032
656
- 2023-07-26 17:03:27,865 epoch 21 - iter 240/243 - loss 0.27583214 - time (sec): 446.70 - samples/sec: 173.99 - lr: 0.000032
657
- 2023-07-26 17:03:32,964 ----------------------------------------------------------------------------------------------------
658
- 2023-07-26 17:03:32,964 EPOCH 21 done: loss 0.2761 - lr 0.000032
659
- 2023-07-26 17:03:34,719 Evaluating as a multi-label problem: False
660
- 2023-07-26 17:03:34,761 DEV : loss 0.20532046258449554 - f1-score (micro avg) 0.9808
661
- 2023-07-26 17:03:34,770 saving best model
662
- 2023-07-26 17:03:38,172 ----------------------------------------------------------------------------------------------------
663
- 2023-07-26 17:04:22,817 epoch 22 - iter 24/243 - loss 0.27909847 - time (sec): 44.64 - samples/sec: 173.08 - lr: 0.000032
664
- 2023-07-26 17:05:07,696 epoch 22 - iter 48/243 - loss 0.27692541 - time (sec): 89.52 - samples/sec: 175.94 - lr: 0.000032
665
- 2023-07-26 17:05:52,516 epoch 22 - iter 72/243 - loss 0.27632545 - time (sec): 134.34 - samples/sec: 175.36 - lr: 0.000032
666
- 2023-07-26 17:06:37,349 epoch 22 - iter 96/243 - loss 0.27607549 - time (sec): 179.18 - samples/sec: 175.31 - lr: 0.000032
667
- 2023-07-26 17:07:22,028 epoch 22 - iter 120/243 - loss 0.27687957 - time (sec): 223.85 - samples/sec: 175.34 - lr: 0.000032
668
- 2023-07-26 17:08:06,628 epoch 22 - iter 144/243 - loss 0.27294774 - time (sec): 268.46 - samples/sec: 174.93 - lr: 0.000032
669
- 2023-07-26 17:08:51,184 epoch 22 - iter 168/243 - loss 0.27391471 - time (sec): 313.01 - samples/sec: 174.15 - lr: 0.000032
670
- 2023-07-26 17:09:35,805 epoch 22 - iter 192/243 - loss 0.27352263 - time (sec): 357.63 - samples/sec: 174.01 - lr: 0.000031
671
- 2023-07-26 17:10:20,566 epoch 22 - iter 216/243 - loss 0.27144978 - time (sec): 402.39 - samples/sec: 174.04 - lr: 0.000031
672
- 2023-07-26 17:11:05,178 epoch 22 - iter 240/243 - loss 0.27338785 - time (sec): 447.01 - samples/sec: 173.85 - lr: 0.000031
673
- 2023-07-26 17:11:10,275 ----------------------------------------------------------------------------------------------------
674
- 2023-07-26 17:11:10,275 EPOCH 22 done: loss 0.2738 - lr 0.000031
675
- 2023-07-26 17:11:12,042 Evaluating as a multi-label problem: False
676
- 2023-07-26 17:11:12,084 DEV : loss 0.20975473523139954 - f1-score (micro avg) 0.9771
677
- 2023-07-26 17:11:12,094 ----------------------------------------------------------------------------------------------------
678
- 2023-07-26 17:11:57,033 epoch 23 - iter 24/243 - loss 0.28534317 - time (sec): 44.94 - samples/sec: 175.57 - lr: 0.000031
679
- 2023-07-26 17:12:41,709 epoch 23 - iter 48/243 - loss 0.28084455 - time (sec): 89.61 - samples/sec: 173.42 - lr: 0.000031
680
- 2023-07-26 17:13:26,426 epoch 23 - iter 72/243 - loss 0.28011749 - time (sec): 134.33 - samples/sec: 173.68 - lr: 0.000031
681
- 2023-07-26 17:14:10,996 epoch 23 - iter 96/243 - loss 0.28443955 - time (sec): 178.90 - samples/sec: 173.25 - lr: 0.000031
682
- 2023-07-26 17:14:55,898 epoch 23 - iter 120/243 - loss 0.28290269 - time (sec): 223.80 - samples/sec: 173.90 - lr: 0.000031
683
- 2023-07-26 17:15:40,508 epoch 23 - iter 144/243 - loss 0.28079246 - time (sec): 268.41 - samples/sec: 173.44 - lr: 0.000031
684
- 2023-07-26 17:16:25,384 epoch 23 - iter 168/243 - loss 0.27982769 - time (sec): 313.29 - samples/sec: 173.93 - lr: 0.000030
685
- 2023-07-26 17:17:10,020 epoch 23 - iter 192/243 - loss 0.27685678 - time (sec): 357.93 - samples/sec: 173.50 - lr: 0.000030
686
- 2023-07-26 17:17:54,847 epoch 23 - iter 216/243 - loss 0.27359946 - time (sec): 402.75 - samples/sec: 173.94 - lr: 0.000030
687
- 2023-07-26 17:18:39,474 epoch 23 - iter 240/243 - loss 0.27378796 - time (sec): 447.38 - samples/sec: 173.62 - lr: 0.000030
688
- 2023-07-26 17:18:44,594 ----------------------------------------------------------------------------------------------------
689
- 2023-07-26 17:18:44,594 EPOCH 23 done: loss 0.2739 - lr 0.000030
690
- 2023-07-26 17:18:46,344 Evaluating as a multi-label problem: False
691
- 2023-07-26 17:18:46,386 DEV : loss 0.21456189453601837 - f1-score (micro avg) 0.9796
692
- 2023-07-26 17:18:46,395 ----------------------------------------------------------------------------------------------------
693
- 2023-07-26 17:19:31,051 epoch 24 - iter 24/243 - loss 0.28123621 - time (sec): 44.66 - samples/sec: 168.56 - lr: 0.000030
694
- 2023-07-26 17:20:15,553 epoch 24 - iter 48/243 - loss 0.27128197 - time (sec): 89.16 - samples/sec: 168.93 - lr: 0.000030
695
- 2023-07-26 17:21:00,218 epoch 24 - iter 72/243 - loss 0.26742573 - time (sec): 133.82 - samples/sec: 169.68 - lr: 0.000030
696
- 2023-07-26 17:21:44,804 epoch 24 - iter 96/243 - loss 0.27426501 - time (sec): 178.41 - samples/sec: 170.21 - lr: 0.000030
697
- 2023-07-26 17:22:29,693 epoch 24 - iter 120/243 - loss 0.26958800 - time (sec): 223.30 - samples/sec: 171.86 - lr: 0.000030
698
- 2023-07-26 17:23:14,736 epoch 24 - iter 144/243 - loss 0.27011544 - time (sec): 268.34 - samples/sec: 174.09 - lr: 0.000029
699
- 2023-07-26 17:23:59,891 epoch 24 - iter 168/243 - loss 0.26573691 - time (sec): 313.50 - samples/sec: 173.54 - lr: 0.000029
700
- 2023-07-26 17:24:44,440 epoch 24 - iter 192/243 - loss 0.26424698 - time (sec): 358.04 - samples/sec: 173.71 - lr: 0.000029
701
- 2023-07-26 17:25:28,792 epoch 24 - iter 216/243 - loss 0.26555746 - time (sec): 402.40 - samples/sec: 173.43 - lr: 0.000029
702
- 2023-07-26 17:26:13,338 epoch 24 - iter 240/243 - loss 0.26918457 - time (sec): 446.94 - samples/sec: 173.77 - lr: 0.000029
703
- 2023-07-26 17:26:18,446 ----------------------------------------------------------------------------------------------------
704
- 2023-07-26 17:26:18,447 EPOCH 24 done: loss 0.2696 - lr 0.000029
705
- 2023-07-26 17:26:20,206 Evaluating as a multi-label problem: False
706
- 2023-07-26 17:26:20,252 DEV : loss 0.21408958733081818 - f1-score (micro avg) 0.9788
707
- 2023-07-26 17:26:20,263 ----------------------------------------------------------------------------------------------------
708
- 2023-07-26 17:27:04,792 epoch 25 - iter 24/243 - loss 0.26057600 - time (sec): 44.53 - samples/sec: 175.66 - lr: 0.000029
709
- 2023-07-26 17:27:49,230 epoch 25 - iter 48/243 - loss 0.25988897 - time (sec): 88.97 - samples/sec: 175.54 - lr: 0.000029
710
- 2023-07-26 17:28:34,576 epoch 25 - iter 72/243 - loss 0.26336622 - time (sec): 134.31 - samples/sec: 174.81 - lr: 0.000029
711
- 2023-07-26 17:29:19,911 epoch 25 - iter 96/243 - loss 0.26126366 - time (sec): 179.65 - samples/sec: 174.70 - lr: 0.000029
712
- 2023-07-26 17:30:05,863 epoch 25 - iter 120/243 - loss 0.26114761 - time (sec): 225.60 - samples/sec: 173.32 - lr: 0.000028
713
- 2023-07-26 17:30:54,836 epoch 25 - iter 144/243 - loss 0.26019042 - time (sec): 274.57 - samples/sec: 170.53 - lr: 0.000028
714
- 2023-07-26 17:31:44,973 epoch 25 - iter 168/243 - loss 0.26060643 - time (sec): 324.71 - samples/sec: 168.00 - lr: 0.000028
715
- 2023-07-26 17:32:34,267 epoch 25 - iter 192/243 - loss 0.26158525 - time (sec): 374.00 - samples/sec: 167.12 - lr: 0.000028
716
- 2023-07-26 17:33:23,148 epoch 25 - iter 216/243 - loss 0.25965178 - time (sec): 422.89 - samples/sec: 165.83 - lr: 0.000028
717
- 2023-07-26 17:34:11,558 epoch 25 - iter 240/243 - loss 0.25991617 - time (sec): 471.29 - samples/sec: 165.08 - lr: 0.000028
718
- 2023-07-26 17:34:17,049 ----------------------------------------------------------------------------------------------------
719
- 2023-07-26 17:34:17,049 EPOCH 25 done: loss 0.2605 - lr 0.000028
720
- 2023-07-26 17:34:18,858 Evaluating as a multi-label problem: False
721
- 2023-07-26 17:34:18,901 DEV : loss 0.20778048038482666 - f1-score (micro avg) 0.9801
722
- 2023-07-26 17:34:18,911 ----------------------------------------------------------------------------------------------------
723
- 2023-07-26 17:35:06,124 epoch 26 - iter 24/243 - loss 0.25028245 - time (sec): 47.21 - samples/sec: 162.56 - lr: 0.000028
724
- 2023-07-26 17:35:56,450 epoch 26 - iter 48/243 - loss 0.26759368 - time (sec): 97.54 - samples/sec: 159.39 - lr: 0.000028
725
- 2023-07-26 17:36:48,254 epoch 26 - iter 72/243 - loss 0.26240750 - time (sec): 149.34 - samples/sec: 155.33 - lr: 0.000028
726
- 2023-07-26 17:37:37,860 epoch 26 - iter 96/243 - loss 0.26499737 - time (sec): 198.95 - samples/sec: 155.95 - lr: 0.000027
727
- 2023-07-26 17:38:26,594 epoch 26 - iter 120/243 - loss 0.26765442 - time (sec): 247.68 - samples/sec: 155.61 - lr: 0.000027
728
- 2023-07-26 17:39:15,951 epoch 26 - iter 144/243 - loss 0.26496660 - time (sec): 297.04 - samples/sec: 155.98 - lr: 0.000027
729
- 2023-07-26 17:40:04,512 epoch 26 - iter 168/243 - loss 0.26407033 - time (sec): 345.60 - samples/sec: 157.09 - lr: 0.000027
730
- 2023-07-26 17:40:52,402 epoch 26 - iter 192/243 - loss 0.26463487 - time (sec): 393.49 - samples/sec: 157.76 - lr: 0.000027
731
- 2023-07-26 17:41:40,356 epoch 26 - iter 216/243 - loss 0.26192074 - time (sec): 441.45 - samples/sec: 158.66 - lr: 0.000027
732
- 2023-07-26 17:42:28,247 epoch 26 - iter 240/243 - loss 0.26299030 - time (sec): 489.34 - samples/sec: 158.86 - lr: 0.000027
733
- 2023-07-26 17:42:33,832 ----------------------------------------------------------------------------------------------------
734
- 2023-07-26 17:42:33,833 EPOCH 26 done: loss 0.2631 - lr 0.000027
735
- 2023-07-26 17:42:35,630 Evaluating as a multi-label problem: False
736
- 2023-07-26 17:42:35,672 DEV : loss 0.22401468455791473 - f1-score (micro avg) 0.9786
737
- 2023-07-26 17:42:35,682 ----------------------------------------------------------------------------------------------------
738
- 2023-07-26 17:43:20,454 epoch 27 - iter 24/243 - loss 0.26639657 - time (sec): 44.77 - samples/sec: 182.81 - lr: 0.000027
739
- 2023-07-26 17:44:04,934 epoch 27 - iter 48/243 - loss 0.27451501 - time (sec): 89.25 - samples/sec: 178.70 - lr: 0.000027
740
- 2023-07-26 17:44:49,425 epoch 27 - iter 72/243 - loss 0.27289399 - time (sec): 133.74 - samples/sec: 176.62 - lr: 0.000026
741
- 2023-07-26 17:45:33,681 epoch 27 - iter 96/243 - loss 0.27091536 - time (sec): 178.00 - samples/sec: 175.50 - lr: 0.000026
742
- 2023-07-26 17:46:18,171 epoch 27 - iter 120/243 - loss 0.27191898 - time (sec): 222.49 - samples/sec: 173.82 - lr: 0.000026
743
- 2023-07-26 17:47:02,640 epoch 27 - iter 144/243 - loss 0.27013358 - time (sec): 266.96 - samples/sec: 173.92 - lr: 0.000026
744
- 2023-07-26 17:47:47,032 epoch 27 - iter 168/243 - loss 0.26766038 - time (sec): 311.35 - samples/sec: 173.58 - lr: 0.000026
745
- 2023-07-26 17:48:33,707 epoch 27 - iter 192/243 - loss 0.26602770 - time (sec): 358.02 - samples/sec: 173.06 - lr: 0.000026
746
- 2023-07-26 17:49:21,690 epoch 27 - iter 216/243 - loss 0.26757355 - time (sec): 406.01 - samples/sec: 171.85 - lr: 0.000026
747
- 2023-07-26 17:50:09,653 epoch 27 - iter 240/243 - loss 0.26544815 - time (sec): 453.97 - samples/sec: 171.19 - lr: 0.000026
748
- 2023-07-26 17:50:15,122 ----------------------------------------------------------------------------------------------------
749
- 2023-07-26 17:50:15,123 EPOCH 27 done: loss 0.2656 - lr 0.000026
750
- 2023-07-26 17:50:17,372 Evaluating as a multi-label problem: False
751
- 2023-07-26 17:50:17,414 DEV : loss 0.2324327975511551 - f1-score (micro avg) 0.9771
752
- 2023-07-26 17:50:17,424 ----------------------------------------------------------------------------------------------------
753
- 2023-07-26 17:51:02,154 epoch 28 - iter 24/243 - loss 0.26044359 - time (sec): 44.73 - samples/sec: 177.24 - lr: 0.000026
754
- 2023-07-26 17:51:46,725 epoch 28 - iter 48/243 - loss 0.25192260 - time (sec): 89.30 - samples/sec: 175.55 - lr: 0.000025
755
- 2023-07-26 17:52:31,357 epoch 28 - iter 72/243 - loss 0.24867911 - time (sec): 133.93 - samples/sec: 175.88 - lr: 0.000025
756
- 2023-07-26 17:53:15,933 epoch 28 - iter 96/243 - loss 0.25204485 - time (sec): 178.51 - samples/sec: 175.73 - lr: 0.000025
757
- 2023-07-26 17:54:00,443 epoch 28 - iter 120/243 - loss 0.24981817 - time (sec): 223.02 - samples/sec: 174.90 - lr: 0.000025
758
- 2023-07-26 17:54:44,958 epoch 28 - iter 144/243 - loss 0.25157168 - time (sec): 267.53 - samples/sec: 174.46 - lr: 0.000025
759
- 2023-07-26 17:55:29,493 epoch 28 - iter 168/243 - loss 0.25440998 - time (sec): 312.07 - samples/sec: 174.04 - lr: 0.000025
760
- 2023-07-26 17:56:13,998 epoch 28 - iter 192/243 - loss 0.25791455 - time (sec): 356.57 - samples/sec: 174.06 - lr: 0.000025
761
- 2023-07-26 17:56:58,663 epoch 28 - iter 216/243 - loss 0.26113615 - time (sec): 401.24 - samples/sec: 173.82 - lr: 0.000025
762
- 2023-07-26 17:57:43,598 epoch 28 - iter 240/243 - loss 0.26254906 - time (sec): 446.17 - samples/sec: 174.40 - lr: 0.000025
763
- 2023-07-26 17:57:48,629 ----------------------------------------------------------------------------------------------------
764
- 2023-07-26 17:57:48,629 EPOCH 28 done: loss 0.2628 - lr 0.000025
765
- 2023-07-26 17:57:50,384 Evaluating as a multi-label problem: False
766
- 2023-07-26 17:57:50,427 DEV : loss 0.21640333533287048 - f1-score (micro avg) 0.9803
767
- 2023-07-26 17:57:50,437 ----------------------------------------------------------------------------------------------------
768
- 2023-07-26 17:58:34,969 epoch 29 - iter 24/243 - loss 0.24833162 - time (sec): 44.53 - samples/sec: 173.47 - lr: 0.000024
769
- 2023-07-26 17:59:19,469 epoch 29 - iter 48/243 - loss 0.25554505 - time (sec): 89.03 - samples/sec: 173.26 - lr: 0.000024
770
- 2023-07-26 18:00:04,033 epoch 29 - iter 72/243 - loss 0.26313723 - time (sec): 133.60 - samples/sec: 173.10 - lr: 0.000024
771
- 2023-07-26 18:00:48,651 epoch 29 - iter 96/243 - loss 0.26456129 - time (sec): 178.21 - samples/sec: 173.90 - lr: 0.000024
772
- 2023-07-26 18:01:33,121 epoch 29 - iter 120/243 - loss 0.26539430 - time (sec): 222.68 - samples/sec: 173.48 - lr: 0.000024
773
- 2023-07-26 18:02:17,661 epoch 29 - iter 144/243 - loss 0.26756174 - time (sec): 267.22 - samples/sec: 173.79 - lr: 0.000024
774
- 2023-07-26 18:03:02,505 epoch 29 - iter 168/243 - loss 0.26309703 - time (sec): 312.07 - samples/sec: 174.46 - lr: 0.000024
775
- 2023-07-26 18:03:46,972 epoch 29 - iter 192/243 - loss 0.26532971 - time (sec): 356.53 - samples/sec: 173.68 - lr: 0.000024
776
- 2023-07-26 18:04:31,621 epoch 29 - iter 216/243 - loss 0.26648227 - time (sec): 401.18 - samples/sec: 173.71 - lr: 0.000024
777
- 2023-07-26 18:05:16,534 epoch 29 - iter 240/243 - loss 0.26528743 - time (sec): 446.10 - samples/sec: 174.44 - lr: 0.000023
778
- 2023-07-26 18:05:21,587 ----------------------------------------------------------------------------------------------------
779
- 2023-07-26 18:05:21,587 EPOCH 29 done: loss 0.2655 - lr 0.000023
780
- 2023-07-26 18:05:23,600 Evaluating as a multi-label problem: False
781
- 2023-07-26 18:05:23,646 DEV : loss 0.24248327314853668 - f1-score (micro avg) 0.9796
782
- 2023-07-26 18:05:23,660 ----------------------------------------------------------------------------------------------------
783
- 2023-07-26 18:06:12,165 epoch 30 - iter 24/243 - loss 0.26154968 - time (sec): 48.51 - samples/sec: 161.26 - lr: 0.000023
784
- 2023-07-26 18:07:02,942 epoch 30 - iter 48/243 - loss 0.27126768 - time (sec): 99.28 - samples/sec: 157.70 - lr: 0.000023
785
- 2023-07-26 18:07:57,979 epoch 30 - iter 72/243 - loss 0.27468039 - time (sec): 154.32 - samples/sec: 150.27 - lr: 0.000023
786
- 2023-07-26 18:08:53,322 epoch 30 - iter 96/243 - loss 0.27662270 - time (sec): 209.66 - samples/sec: 147.69 - lr: 0.000023
787
- 2023-07-26 18:09:48,639 epoch 30 - iter 120/243 - loss 0.27403633 - time (sec): 264.98 - samples/sec: 145.88 - lr: 0.000023
788
- 2023-07-26 18:10:40,050 epoch 30 - iter 144/243 - loss 0.27461637 - time (sec): 316.39 - samples/sec: 146.57 - lr: 0.000023
789
- 2023-07-26 18:11:29,552 epoch 30 - iter 168/243 - loss 0.26994770 - time (sec): 365.89 - samples/sec: 148.67 - lr: 0.000023
790
- 2023-07-26 18:12:18,746 epoch 30 - iter 192/243 - loss 0.26952319 - time (sec): 415.09 - samples/sec: 150.29 - lr: 0.000023
791
- 2023-07-26 18:13:07,757 epoch 30 - iter 216/243 - loss 0.26556592 - time (sec): 464.10 - samples/sec: 151.23 - lr: 0.000022
792
- 2023-07-26 18:13:56,449 epoch 30 - iter 240/243 - loss 0.26521277 - time (sec): 512.79 - samples/sec: 151.74 - lr: 0.000022
793
- 2023-07-26 18:14:01,871 ----------------------------------------------------------------------------------------------------
794
- 2023-07-26 18:14:01,871 EPOCH 30 done: loss 0.2653 - lr 0.000022
795
- 2023-07-26 18:14:03,693 Evaluating as a multi-label problem: False
796
- 2023-07-26 18:14:03,735 DEV : loss 0.23393450677394867 - f1-score (micro avg) 0.9776
797
- 2023-07-26 18:14:03,746 ----------------------------------------------------------------------------------------------------
798
- 2023-07-26 18:14:48,764 epoch 31 - iter 24/243 - loss 0.24073944 - time (sec): 45.02 - samples/sec: 179.77 - lr: 0.000022
799
- 2023-07-26 18:15:33,209 epoch 31 - iter 48/243 - loss 0.24507990 - time (sec): 89.46 - samples/sec: 173.00 - lr: 0.000022
800
- 2023-07-26 18:16:17,809 epoch 31 - iter 72/243 - loss 0.25127541 - time (sec): 134.06 - samples/sec: 173.96 - lr: 0.000022
801
- 2023-07-26 18:17:02,650 epoch 31 - iter 96/243 - loss 0.25526836 - time (sec): 178.90 - samples/sec: 175.19 - lr: 0.000022
802
- 2023-07-26 18:17:47,365 epoch 31 - iter 120/243 - loss 0.25884615 - time (sec): 223.62 - samples/sec: 174.90 - lr: 0.000022
803
- 2023-07-26 18:18:32,093 epoch 31 - iter 144/243 - loss 0.26107421 - time (sec): 268.35 - samples/sec: 174.71 - lr: 0.000022
804
- 2023-07-26 18:19:16,568 epoch 31 - iter 168/243 - loss 0.25772191 - time (sec): 312.82 - samples/sec: 174.07 - lr: 0.000022
805
- 2023-07-26 18:20:01,232 epoch 31 - iter 192/243 - loss 0.25843953 - time (sec): 357.49 - samples/sec: 174.12 - lr: 0.000021
806
- 2023-07-26 18:20:46,098 epoch 31 - iter 216/243 - loss 0.25940033 - time (sec): 402.35 - samples/sec: 174.28 - lr: 0.000021
807
- 2023-07-26 18:21:30,680 epoch 31 - iter 240/243 - loss 0.25924131 - time (sec): 446.93 - samples/sec: 173.95 - lr: 0.000021
808
- 2023-07-26 18:21:35,753 ----------------------------------------------------------------------------------------------------
809
- 2023-07-26 18:21:35,753 EPOCH 31 done: loss 0.2594 - lr 0.000021
810
- 2023-07-26 18:21:37,502 Evaluating as a multi-label problem: False
811
- 2023-07-26 18:21:37,544 DEV : loss 0.22774212062358856 - f1-score (micro avg) 0.9788
812
- 2023-07-26 18:21:37,554 ----------------------------------------------------------------------------------------------------
813
- 2023-07-26 18:22:22,282 epoch 32 - iter 24/243 - loss 0.25476998 - time (sec): 44.73 - samples/sec: 179.17 - lr: 0.000021
814
- 2023-07-26 18:23:07,025 epoch 32 - iter 48/243 - loss 0.25629909 - time (sec): 89.47 - samples/sec: 178.31 - lr: 0.000021
815
- 2023-07-26 18:23:51,761 epoch 32 - iter 72/243 - loss 0.25739595 - time (sec): 134.21 - samples/sec: 177.13 - lr: 0.000021
816
- 2023-07-26 18:24:36,312 epoch 32 - iter 96/243 - loss 0.26207122 - time (sec): 178.76 - samples/sec: 175.24 - lr: 0.000021
817
- 2023-07-26 18:25:20,955 epoch 32 - iter 120/243 - loss 0.26238445 - time (sec): 223.40 - samples/sec: 175.45 - lr: 0.000021
818
- 2023-07-26 18:26:05,680 epoch 32 - iter 144/243 - loss 0.26421827 - time (sec): 268.13 - samples/sec: 174.45 - lr: 0.000021
819
- 2023-07-26 18:26:50,600 epoch 32 - iter 168/243 - loss 0.26554256 - time (sec): 313.05 - samples/sec: 175.05 - lr: 0.000020
820
- 2023-07-26 18:27:37,550 epoch 32 - iter 192/243 - loss 0.26682748 - time (sec): 360.00 - samples/sec: 173.67 - lr: 0.000020
821
- 2023-07-26 18:28:26,938 epoch 32 - iter 216/243 - loss 0.26495455 - time (sec): 409.38 - samples/sec: 172.06 - lr: 0.000020
822
- 2023-07-26 18:29:15,763 epoch 32 - iter 240/243 - loss 0.26526827 - time (sec): 458.21 - samples/sec: 169.70 - lr: 0.000020
823
- 2023-07-26 18:29:21,316 ----------------------------------------------------------------------------------------------------
824
- 2023-07-26 18:29:21,316 EPOCH 32 done: loss 0.2646 - lr 0.000020
825
- 2023-07-26 18:29:23,143 Evaluating as a multi-label problem: False
826
- 2023-07-26 18:29:23,187 DEV : loss 0.22920973598957062 - f1-score (micro avg) 0.9793
827
- 2023-07-26 18:29:23,197 ----------------------------------------------------------------------------------------------------
828
- 2023-07-26 18:30:10,600 epoch 33 - iter 24/243 - loss 0.26866868 - time (sec): 47.40 - samples/sec: 165.35 - lr: 0.000020
829
- 2023-07-26 18:30:58,341 epoch 33 - iter 48/243 - loss 0.25914800 - time (sec): 95.14 - samples/sec: 161.12 - lr: 0.000020
830
- 2023-07-26 18:31:46,238 epoch 33 - iter 72/243 - loss 0.25631313 - time (sec): 143.04 - samples/sec: 161.25 - lr: 0.000020
831
- 2023-07-26 18:32:38,739 epoch 33 - iter 96/243 - loss 0.25455371 - time (sec): 195.54 - samples/sec: 158.90 - lr: 0.000020
832
- 2023-07-26 18:33:26,705 epoch 33 - iter 120/243 - loss 0.25585405 - time (sec): 243.51 - samples/sec: 159.48 - lr: 0.000020
833
- 2023-07-26 18:34:14,895 epoch 33 - iter 144/243 - loss 0.25945055 - time (sec): 291.70 - samples/sec: 159.74 - lr: 0.000019
834
- 2023-07-26 18:35:02,659 epoch 33 - iter 168/243 - loss 0.25932428 - time (sec): 339.46 - samples/sec: 159.76 - lr: 0.000019
835
- 2023-07-26 18:35:50,532 epoch 33 - iter 192/243 - loss 0.25851724 - time (sec): 387.33 - samples/sec: 160.31 - lr: 0.000019
836
- 2023-07-26 18:36:38,327 epoch 33 - iter 216/243 - loss 0.25678080 - time (sec): 435.13 - samples/sec: 160.50 - lr: 0.000019
837
- 2023-07-26 18:37:26,262 epoch 33 - iter 240/243 - loss 0.25562158 - time (sec): 483.06 - samples/sec: 160.86 - lr: 0.000019
838
- 2023-07-26 18:37:31,732 ----------------------------------------------------------------------------------------------------
839
- 2023-07-26 18:37:31,732 EPOCH 33 done: loss 0.2552 - lr 0.000019
840
- 2023-07-26 18:37:33,524 Evaluating as a multi-label problem: False
841
- 2023-07-26 18:37:33,566 DEV : loss 0.23627179861068726 - f1-score (micro avg) 0.9791
842
- 2023-07-26 18:37:33,576 ----------------------------------------------------------------------------------------------------
843
- 2023-07-26 18:38:18,104 epoch 34 - iter 24/243 - loss 0.27182899 - time (sec): 44.53 - samples/sec: 177.66 - lr: 0.000019
844
- 2023-07-26 18:39:02,789 epoch 34 - iter 48/243 - loss 0.27027922 - time (sec): 89.21 - samples/sec: 177.42 - lr: 0.000019
845
- 2023-07-26 18:39:47,401 epoch 34 - iter 72/243 - loss 0.26451951 - time (sec): 133.83 - samples/sec: 176.71 - lr: 0.000019
846
- 2023-07-26 18:40:31,661 epoch 34 - iter 96/243 - loss 0.26736759 - time (sec): 178.09 - samples/sec: 174.31 - lr: 0.000019
847
- 2023-07-26 18:41:16,196 epoch 34 - iter 120/243 - loss 0.26439071 - time (sec): 222.62 - samples/sec: 174.62 - lr: 0.000018
848
- 2023-07-26 18:42:00,770 epoch 34 - iter 144/243 - loss 0.26033732 - time (sec): 267.19 - samples/sec: 174.48 - lr: 0.000018
849
- 2023-07-26 18:42:45,441 epoch 34 - iter 168/243 - loss 0.25756053 - time (sec): 311.87 - samples/sec: 174.19 - lr: 0.000018
850
- 2023-07-26 18:43:30,194 epoch 34 - iter 192/243 - loss 0.26053780 - time (sec): 356.62 - samples/sec: 174.51 - lr: 0.000018
851
- 2023-07-26 18:44:14,725 epoch 34 - iter 216/243 - loss 0.26079037 - time (sec): 401.15 - samples/sec: 174.64 - lr: 0.000018
852
- 2023-07-26 18:44:59,292 epoch 34 - iter 240/243 - loss 0.25971768 - time (sec): 445.72 - samples/sec: 174.39 - lr: 0.000018
853
- 2023-07-26 18:45:04,380 ----------------------------------------------------------------------------------------------------
854
- 2023-07-26 18:45:04,380 EPOCH 34 done: loss 0.2595 - lr 0.000018
855
- 2023-07-26 18:45:06,131 Evaluating as a multi-label problem: False
856
- 2023-07-26 18:45:06,173 DEV : loss 0.23955273628234863 - f1-score (micro avg) 0.9796
857
- 2023-07-26 18:45:06,183 ----------------------------------------------------------------------------------------------------
858
- 2023-07-26 18:45:50,882 epoch 35 - iter 24/243 - loss 0.26701266 - time (sec): 44.70 - samples/sec: 178.66 - lr: 0.000018
859
- 2023-07-26 18:46:35,519 epoch 35 - iter 48/243 - loss 0.25211759 - time (sec): 89.34 - samples/sec: 175.67 - lr: 0.000018
860
- 2023-07-26 18:47:20,251 epoch 35 - iter 72/243 - loss 0.25876122 - time (sec): 134.07 - samples/sec: 175.92 - lr: 0.000018
861
- 2023-07-26 18:48:04,922 epoch 35 - iter 96/243 - loss 0.25751966 - time (sec): 178.74 - samples/sec: 175.77 - lr: 0.000017
862
- 2023-07-26 18:48:49,416 epoch 35 - iter 120/243 - loss 0.25782676 - time (sec): 223.23 - samples/sec: 174.59 - lr: 0.000017
863
- 2023-07-26 18:49:34,049 epoch 35 - iter 144/243 - loss 0.26020302 - time (sec): 267.87 - samples/sec: 174.72 - lr: 0.000017
864
- 2023-07-26 18:50:18,677 epoch 35 - iter 168/243 - loss 0.26431905 - time (sec): 312.49 - samples/sec: 175.29 - lr: 0.000017
865
- 2023-07-26 18:51:03,300 epoch 35 - iter 192/243 - loss 0.26060801 - time (sec): 357.12 - samples/sec: 175.10 - lr: 0.000017
866
- 2023-07-26 18:51:47,857 epoch 35 - iter 216/243 - loss 0.26100924 - time (sec): 401.67 - samples/sec: 174.60 - lr: 0.000017
867
- 2023-07-26 18:52:32,385 epoch 35 - iter 240/243 - loss 0.26071736 - time (sec): 446.20 - samples/sec: 174.23 - lr: 0.000017
868
- 2023-07-26 18:52:37,453 ----------------------------------------------------------------------------------------------------
869
- 2023-07-26 18:52:37,454 EPOCH 35 done: loss 0.2611 - lr 0.000017
870
- 2023-07-26 18:52:39,658 Evaluating as a multi-label problem: False
871
- 2023-07-26 18:52:39,699 DEV : loss 0.24450713396072388 - f1-score (micro avg) 0.9791
872
- 2023-07-26 18:52:39,709 ----------------------------------------------------------------------------------------------------
873
- 2023-07-26 18:53:24,264 epoch 36 - iter 24/243 - loss 0.27084705 - time (sec): 44.55 - samples/sec: 175.18 - lr: 0.000017
874
- 2023-07-26 18:54:08,663 epoch 36 - iter 48/243 - loss 0.25947400 - time (sec): 88.95 - samples/sec: 173.11 - lr: 0.000017
875
- 2023-07-26 18:54:53,501 epoch 36 - iter 72/243 - loss 0.25687195 - time (sec): 133.79 - samples/sec: 175.35 - lr: 0.000016
876
- 2023-07-26 18:55:37,893 epoch 36 - iter 96/243 - loss 0.25424198 - time (sec): 178.18 - samples/sec: 173.93 - lr: 0.000016
877
- 2023-07-26 18:56:22,286 epoch 36 - iter 120/243 - loss 0.25557169 - time (sec): 222.58 - samples/sec: 173.34 - lr: 0.000016
878
- 2023-07-26 18:57:15,000 epoch 36 - iter 144/243 - loss 0.25787383 - time (sec): 275.29 - samples/sec: 168.90 - lr: 0.000016
879
- 2023-07-26 18:58:08,183 epoch 36 - iter 168/243 - loss 0.25642415 - time (sec): 328.47 - samples/sec: 165.37 - lr: 0.000016
880
- 2023-07-26 18:59:02,250 epoch 36 - iter 192/243 - loss 0.25543523 - time (sec): 382.54 - samples/sec: 162.77 - lr: 0.000016
881
- 2023-07-26 18:59:53,084 epoch 36 - iter 216/243 - loss 0.25443060 - time (sec): 433.38 - samples/sec: 161.58 - lr: 0.000016
882
- 2023-07-26 19:00:41,508 epoch 36 - iter 240/243 - loss 0.25344304 - time (sec): 481.80 - samples/sec: 161.25 - lr: 0.000016
883
- 2023-07-26 19:00:47,029 ----------------------------------------------------------------------------------------------------
884
- 2023-07-26 19:00:47,029 EPOCH 36 done: loss 0.2536 - lr 0.000016
885
- 2023-07-26 19:00:48,817 Evaluating as a multi-label problem: False
886
- 2023-07-26 19:00:48,859 DEV : loss 0.2530966103076935 - f1-score (micro avg) 0.9788
887
- 2023-07-26 19:00:48,869 ----------------------------------------------------------------------------------------------------
888
- 2023-07-26 19:01:33,809 epoch 37 - iter 24/243 - loss 0.27190881 - time (sec): 44.94 - samples/sec: 183.74 - lr: 0.000016
889
- 2023-07-26 19:02:18,402 epoch 37 - iter 48/243 - loss 0.26681536 - time (sec): 89.53 - samples/sec: 178.86 - lr: 0.000015
890
- 2023-07-26 19:03:03,153 epoch 37 - iter 72/243 - loss 0.26204165 - time (sec): 134.28 - samples/sec: 177.43 - lr: 0.000015
891
- 2023-07-26 19:03:47,816 epoch 37 - iter 96/243 - loss 0.25844813 - time (sec): 178.95 - samples/sec: 176.20 - lr: 0.000015
892
- 2023-07-26 19:04:32,391 epoch 37 - iter 120/243 - loss 0.25889938 - time (sec): 223.52 - samples/sec: 174.82 - lr: 0.000015
893
- 2023-07-26 19:05:17,029 epoch 37 - iter 144/243 - loss 0.26222809 - time (sec): 268.16 - samples/sec: 175.18 - lr: 0.000015
894
- 2023-07-26 19:06:01,650 epoch 37 - iter 168/243 - loss 0.26407155 - time (sec): 312.78 - samples/sec: 174.91 - lr: 0.000015
895
- 2023-07-26 19:06:46,300 epoch 37 - iter 192/243 - loss 0.26361155 - time (sec): 357.43 - samples/sec: 174.83 - lr: 0.000015
896
- 2023-07-26 19:07:31,061 epoch 37 - iter 216/243 - loss 0.26668156 - time (sec): 402.19 - samples/sec: 174.78 - lr: 0.000015
897
- 2023-07-26 19:08:15,436 epoch 37 - iter 240/243 - loss 0.26504239 - time (sec): 446.57 - samples/sec: 174.15 - lr: 0.000015
898
- 2023-07-26 19:08:20,495 ----------------------------------------------------------------------------------------------------
899
- 2023-07-26 19:08:20,495 EPOCH 37 done: loss 0.2650 - lr 0.000015
900
- 2023-07-26 19:08:22,330 Evaluating as a multi-label problem: False
901
- 2023-07-26 19:08:22,374 DEV : loss 0.2624962031841278 - f1-score (micro avg) 0.9781
902
- 2023-07-26 19:08:22,384 ----------------------------------------------------------------------------------------------------
903
- 2023-07-26 19:09:06,629 epoch 38 - iter 24/243 - loss 0.26162759 - time (sec): 44.24 - samples/sec: 174.37 - lr: 0.000014
904
- 2023-07-26 19:09:51,176 epoch 38 - iter 48/243 - loss 0.26085357 - time (sec): 88.79 - samples/sec: 175.87 - lr: 0.000014
905
- 2023-07-26 19:10:35,702 epoch 38 - iter 72/243 - loss 0.25308808 - time (sec): 133.32 - samples/sec: 176.61 - lr: 0.000014
906
- 2023-07-26 19:11:19,948 epoch 38 - iter 96/243 - loss 0.25632516 - time (sec): 177.56 - samples/sec: 175.93 - lr: 0.000014
907
- 2023-07-26 19:12:04,580 epoch 38 - iter 120/243 - loss 0.25358337 - time (sec): 222.20 - samples/sec: 176.70 - lr: 0.000014
908
- 2023-07-26 19:12:48,992 epoch 38 - iter 144/243 - loss 0.25557088 - time (sec): 266.61 - samples/sec: 176.51 - lr: 0.000014
909
- 2023-07-26 19:13:33,435 epoch 38 - iter 168/243 - loss 0.25407854 - time (sec): 311.05 - samples/sec: 176.83 - lr: 0.000014
910
- 2023-07-26 19:14:17,541 epoch 38 - iter 192/243 - loss 0.25597339 - time (sec): 355.16 - samples/sec: 176.02 - lr: 0.000014
911
- 2023-07-26 19:15:01,826 epoch 38 - iter 216/243 - loss 0.25532730 - time (sec): 399.44 - samples/sec: 175.68 - lr: 0.000014
912
- 2023-07-26 19:15:45,905 epoch 38 - iter 240/243 - loss 0.25415245 - time (sec): 443.52 - samples/sec: 175.02 - lr: 0.000013
913
- 2023-07-26 19:15:51,052 ----------------------------------------------------------------------------------------------------
914
- 2023-07-26 19:15:51,053 EPOCH 38 done: loss 0.2542 - lr 0.000013
915
- 2023-07-26 19:15:52,801 Evaluating as a multi-label problem: False
916
- 2023-07-26 19:15:52,845 DEV : loss 0.24244999885559082 - f1-score (micro avg) 0.9788
917
- 2023-07-26 19:15:52,855 ----------------------------------------------------------------------------------------------------
918
- 2023-07-26 19:16:37,293 epoch 39 - iter 24/243 - loss 0.25336484 - time (sec): 44.44 - samples/sec: 176.50 - lr: 0.000013
919
- 2023-07-26 19:17:21,644 epoch 39 - iter 48/243 - loss 0.25897743 - time (sec): 88.79 - samples/sec: 177.38 - lr: 0.000013
920
- 2023-07-26 19:18:05,772 epoch 39 - iter 72/243 - loss 0.25769549 - time (sec): 132.92 - samples/sec: 175.31 - lr: 0.000013
921
- 2023-07-26 19:18:50,169 epoch 39 - iter 96/243 - loss 0.25751150 - time (sec): 177.31 - samples/sec: 175.93 - lr: 0.000013
922
- 2023-07-26 19:19:34,381 epoch 39 - iter 120/243 - loss 0.25315782 - time (sec): 221.53 - samples/sec: 175.24 - lr: 0.000013
923
- 2023-07-26 19:20:18,559 epoch 39 - iter 144/243 - loss 0.25233489 - time (sec): 265.70 - samples/sec: 174.74 - lr: 0.000013
924
- 2023-07-26 19:21:03,145 epoch 39 - iter 168/243 - loss 0.25114668 - time (sec): 310.29 - samples/sec: 174.33 - lr: 0.000013
925
- 2023-07-26 19:21:47,854 epoch 39 - iter 192/243 - loss 0.25185953 - time (sec): 355.00 - samples/sec: 174.12 - lr: 0.000013
926
- 2023-07-26 19:22:32,507 epoch 39 - iter 216/243 - loss 0.25746349 - time (sec): 399.65 - samples/sec: 174.90 - lr: 0.000012
927
- 2023-07-26 19:23:16,796 epoch 39 - iter 240/243 - loss 0.25680252 - time (sec): 443.94 - samples/sec: 174.98 - lr: 0.000012
928
- 2023-07-26 19:23:21,907 ----------------------------------------------------------------------------------------------------
929
- 2023-07-26 19:23:21,908 EPOCH 39 done: loss 0.2579 - lr 0.000012
930
- 2023-07-26 19:23:23,678 Evaluating as a multi-label problem: False
931
- 2023-07-26 19:23:23,719 DEV : loss 0.24615894258022308 - f1-score (micro avg) 0.9798
932
- 2023-07-26 19:23:23,729 ----------------------------------------------------------------------------------------------------
933
- 2023-07-26 19:24:08,073 epoch 40 - iter 24/243 - loss 0.24837758 - time (sec): 44.34 - samples/sec: 175.24 - lr: 0.000012
934
- 2023-07-26 19:24:52,448 epoch 40 - iter 48/243 - loss 0.24725040 - time (sec): 88.72 - samples/sec: 176.39 - lr: 0.000012
935
- 2023-07-26 19:25:37,011 epoch 40 - iter 72/243 - loss 0.25023824 - time (sec): 133.28 - samples/sec: 176.92 - lr: 0.000012
936
- 2023-07-26 19:26:21,296 epoch 40 - iter 96/243 - loss 0.24239002 - time (sec): 177.57 - samples/sec: 176.32 - lr: 0.000012
937
- 2023-07-26 19:27:05,481 epoch 40 - iter 120/243 - loss 0.24524267 - time (sec): 221.75 - samples/sec: 175.34 - lr: 0.000012
938
- 2023-07-26 19:27:49,791 epoch 40 - iter 144/243 - loss 0.24784591 - time (sec): 266.06 - samples/sec: 175.50 - lr: 0.000012
939
- 2023-07-26 19:28:34,155 epoch 40 - iter 168/243 - loss 0.24872740 - time (sec): 310.43 - samples/sec: 174.67 - lr: 0.000012
940
- 2023-07-26 19:29:18,697 epoch 40 - iter 192/243 - loss 0.25012412 - time (sec): 354.97 - samples/sec: 174.67 - lr: 0.000011
941
- 2023-07-26 19:30:03,191 epoch 40 - iter 216/243 - loss 0.25345259 - time (sec): 399.46 - samples/sec: 174.99 - lr: 0.000011
942
- 2023-07-26 19:30:47,560 epoch 40 - iter 240/243 - loss 0.25383699 - time (sec): 443.83 - samples/sec: 174.98 - lr: 0.000011
943
- 2023-07-26 19:30:52,654 ----------------------------------------------------------------------------------------------------
944
- 2023-07-26 19:30:52,655 EPOCH 40 done: loss 0.2540 - lr 0.000011
945
- 2023-07-26 19:30:54,396 Evaluating as a multi-label problem: False
946
- 2023-07-26 19:30:54,438 DEV : loss 0.2575598359107971 - f1-score (micro avg) 0.9791
947
- 2023-07-26 19:30:54,447 ----------------------------------------------------------------------------------------------------
948
- 2023-07-26 19:31:38,768 epoch 41 - iter 24/243 - loss 0.24306327 - time (sec): 44.32 - samples/sec: 175.47 - lr: 0.000011
949
- 2023-07-26 19:32:23,092 epoch 41 - iter 48/243 - loss 0.24156726 - time (sec): 88.64 - samples/sec: 175.50 - lr: 0.000011
950
- 2023-07-26 19:33:07,508 epoch 41 - iter 72/243 - loss 0.24869032 - time (sec): 133.06 - samples/sec: 177.14 - lr: 0.000011
951
- 2023-07-26 19:33:51,664 epoch 41 - iter 96/243 - loss 0.25072177 - time (sec): 177.22 - samples/sec: 175.38 - lr: 0.000011
952
- 2023-07-26 19:34:35,771 epoch 41 - iter 120/243 - loss 0.25396376 - time (sec): 221.32 - samples/sec: 174.35 - lr: 0.000011
953
- 2023-07-26 19:35:20,071 epoch 41 - iter 144/243 - loss 0.25095812 - time (sec): 265.62 - samples/sec: 174.83 - lr: 0.000011
954
- 2023-07-26 19:36:04,548 epoch 41 - iter 168/243 - loss 0.24810464 - time (sec): 310.10 - samples/sec: 175.56 - lr: 0.000010
955
- 2023-07-26 19:36:48,812 epoch 41 - iter 192/243 - loss 0.24879453 - time (sec): 354.36 - samples/sec: 175.37 - lr: 0.000010
956
- 2023-07-26 19:37:33,241 epoch 41 - iter 216/243 - loss 0.25177431 - time (sec): 398.79 - samples/sec: 175.79 - lr: 0.000010
957
- 2023-07-26 19:38:17,405 epoch 41 - iter 240/243 - loss 0.25152758 - time (sec): 442.96 - samples/sec: 175.50 - lr: 0.000010
958
- 2023-07-26 19:38:22,468 ----------------------------------------------------------------------------------------------------
959
- 2023-07-26 19:38:22,469 EPOCH 41 done: loss 0.2509 - lr 0.000010
960
- 2023-07-26 19:38:24,215 Evaluating as a multi-label problem: False
961
- 2023-07-26 19:38:24,257 DEV : loss 0.25127604603767395 - f1-score (micro avg) 0.9786
962
- 2023-07-26 19:38:24,267 ----------------------------------------------------------------------------------------------------
963
- 2023-07-26 19:39:08,271 epoch 42 - iter 24/243 - loss 0.25413425 - time (sec): 44.00 - samples/sec: 167.14 - lr: 0.000010
964
- 2023-07-26 19:39:52,711 epoch 42 - iter 48/243 - loss 0.25771203 - time (sec): 88.44 - samples/sec: 173.59 - lr: 0.000010
965
- 2023-07-26 19:40:37,013 epoch 42 - iter 72/243 - loss 0.25402986 - time (sec): 132.75 - samples/sec: 174.07 - lr: 0.000010
966
- 2023-07-26 19:41:21,464 epoch 42 - iter 96/243 - loss 0.25689370 - time (sec): 177.20 - samples/sec: 175.64 - lr: 0.000010
967
- 2023-07-26 19:42:05,507 epoch 42 - iter 120/243 - loss 0.25635789 - time (sec): 221.24 - samples/sec: 174.08 - lr: 0.000010
968
- 2023-07-26 19:42:49,881 epoch 42 - iter 144/243 - loss 0.25641142 - time (sec): 265.61 - samples/sec: 174.68 - lr: 0.000009
969
- 2023-07-26 19:43:34,200 epoch 42 - iter 168/243 - loss 0.25676110 - time (sec): 309.93 - samples/sec: 175.15 - lr: 0.000009
970
- 2023-07-26 19:44:18,472 epoch 42 - iter 192/243 - loss 0.25789268 - time (sec): 354.20 - samples/sec: 175.15 - lr: 0.000009
971
- 2023-07-26 19:45:02,833 epoch 42 - iter 216/243 - loss 0.25889165 - time (sec): 398.57 - samples/sec: 175.63 - lr: 0.000009
972
- 2023-07-26 19:45:47,116 epoch 42 - iter 240/243 - loss 0.25885055 - time (sec): 442.85 - samples/sec: 175.64 - lr: 0.000009
973
- 2023-07-26 19:45:52,133 ----------------------------------------------------------------------------------------------------
974
- 2023-07-26 19:45:52,133 EPOCH 42 done: loss 0.2584 - lr 0.000009
975
- 2023-07-26 19:45:54,001 Evaluating as a multi-label problem: False
976
- 2023-07-26 19:45:54,045 DEV : loss 0.2509002983570099 - f1-score (micro avg) 0.9776
977
- 2023-07-26 19:45:54,056 ----------------------------------------------------------------------------------------------------
978
- 2023-07-26 19:46:38,776 epoch 43 - iter 24/243 - loss 0.25656669 - time (sec): 44.72 - samples/sec: 175.88 - lr: 0.000009
979
- 2023-07-26 19:47:23,646 epoch 43 - iter 48/243 - loss 0.25713909 - time (sec): 89.59 - samples/sec: 179.17 - lr: 0.000009
980
- 2023-07-26 19:48:08,109 epoch 43 - iter 72/243 - loss 0.25209780 - time (sec): 134.05 - samples/sec: 176.45 - lr: 0.000009
981
- 2023-07-26 19:48:52,698 epoch 43 - iter 96/243 - loss 0.24509857 - time (sec): 178.64 - samples/sec: 175.75 - lr: 0.000009
982
- 2023-07-26 19:49:37,182 epoch 43 - iter 120/243 - loss 0.25000579 - time (sec): 223.13 - samples/sec: 174.94 - lr: 0.000008
983
- 2023-07-26 19:50:21,736 epoch 43 - iter 144/243 - loss 0.25295949 - time (sec): 267.68 - samples/sec: 175.08 - lr: 0.000008
984
- 2023-07-26 19:51:06,420 epoch 43 - iter 168/243 - loss 0.25493036 - time (sec): 312.36 - samples/sec: 175.74 - lr: 0.000008
985
- 2023-07-26 19:51:50,778 epoch 43 - iter 192/243 - loss 0.25313033 - time (sec): 356.72 - samples/sec: 174.71 - lr: 0.000008
986
- 2023-07-26 19:52:35,121 epoch 43 - iter 216/243 - loss 0.25255837 - time (sec): 401.06 - samples/sec: 174.20 - lr: 0.000008
987
- 2023-07-26 19:53:19,699 epoch 43 - iter 240/243 - loss 0.25326105 - time (sec): 445.64 - samples/sec: 174.52 - lr: 0.000008
988
- 2023-07-26 19:53:24,805 ----------------------------------------------------------------------------------------------------
989
- 2023-07-26 19:53:24,805 EPOCH 43 done: loss 0.2536 - lr 0.000008
990
- 2023-07-26 19:53:27,059 Evaluating as a multi-label problem: False
991
- 2023-07-26 19:53:27,103 DEV : loss 0.25337928533554077 - f1-score (micro avg) 0.9784
992
- 2023-07-26 19:53:27,114 ----------------------------------------------------------------------------------------------------
993
- 2023-07-26 19:54:11,472 epoch 44 - iter 24/243 - loss 0.22752064 - time (sec): 44.36 - samples/sec: 169.73 - lr: 0.000008
994
- 2023-07-26 19:54:55,919 epoch 44 - iter 48/243 - loss 0.23951614 - time (sec): 88.80 - samples/sec: 171.20 - lr: 0.000008
995
- 2023-07-26 19:55:40,452 epoch 44 - iter 72/243 - loss 0.23986022 - time (sec): 133.34 - samples/sec: 171.97 - lr: 0.000008
996
- 2023-07-26 19:56:25,023 epoch 44 - iter 96/243 - loss 0.24528781 - time (sec): 177.91 - samples/sec: 173.40 - lr: 0.000007
997
- 2023-07-26 19:57:09,511 epoch 44 - iter 120/243 - loss 0.24572088 - time (sec): 222.40 - samples/sec: 173.40 - lr: 0.000007
998
- 2023-07-26 19:57:54,163 epoch 44 - iter 144/243 - loss 0.24464183 - time (sec): 267.05 - samples/sec: 173.04 - lr: 0.000007
999
- 2023-07-26 19:58:39,149 epoch 44 - iter 168/243 - loss 0.24523592 - time (sec): 312.04 - samples/sec: 173.72 - lr: 0.000007
1000
- 2023-07-26 19:59:23,881 epoch 44 - iter 192/243 - loss 0.24519757 - time (sec): 356.77 - samples/sec: 173.61 - lr: 0.000007
1001
- 2023-07-26 20:00:08,665 epoch 44 - iter 216/243 - loss 0.24456227 - time (sec): 401.55 - samples/sec: 173.99 - lr: 0.000007
1002
- 2023-07-26 20:00:53,278 epoch 44 - iter 240/243 - loss 0.24582873 - time (sec): 446.16 - samples/sec: 174.08 - lr: 0.000007
1003
- 2023-07-26 20:00:58,393 ----------------------------------------------------------------------------------------------------
1004
- 2023-07-26 20:00:58,393 EPOCH 44 done: loss 0.2462 - lr 0.000007
1005
- 2023-07-26 20:01:00,158 Evaluating as a multi-label problem: False
1006
- 2023-07-26 20:01:00,200 DEV : loss 0.25915977358818054 - f1-score (micro avg) 0.9784
1007
- 2023-07-26 20:01:00,210 ----------------------------------------------------------------------------------------------------
1008
- 2023-07-26 20:01:44,820 epoch 45 - iter 24/243 - loss 0.26201019 - time (sec): 44.61 - samples/sec: 176.98 - lr: 0.000007
1009
- 2023-07-26 20:02:29,464 epoch 45 - iter 48/243 - loss 0.24779270 - time (sec): 89.25 - samples/sec: 174.35 - lr: 0.000007
1010
- 2023-07-26 20:03:13,973 epoch 45 - iter 72/243 - loss 0.25012887 - time (sec): 133.76 - samples/sec: 174.72 - lr: 0.000006
1011
- 2023-07-26 20:03:58,625 epoch 45 - iter 96/243 - loss 0.25289868 - time (sec): 178.41 - samples/sec: 174.60 - lr: 0.000006
1012
- 2023-07-26 20:04:43,139 epoch 45 - iter 120/243 - loss 0.25326284 - time (sec): 222.93 - samples/sec: 174.12 - lr: 0.000006
1013
- 2023-07-26 20:05:27,809 epoch 45 - iter 144/243 - loss 0.25373868 - time (sec): 267.60 - samples/sec: 174.76 - lr: 0.000006
1014
- 2023-07-26 20:06:12,288 epoch 45 - iter 168/243 - loss 0.25215421 - time (sec): 312.08 - samples/sec: 174.53 - lr: 0.000006
1015
- 2023-07-26 20:06:56,723 epoch 45 - iter 192/243 - loss 0.25175489 - time (sec): 356.51 - samples/sec: 174.02 - lr: 0.000006
1016
- 2023-07-26 20:07:41,287 epoch 45 - iter 216/243 - loss 0.24952171 - time (sec): 401.08 - samples/sec: 174.05 - lr: 0.000006
1017
- 2023-07-26 20:08:25,996 epoch 45 - iter 240/243 - loss 0.25004168 - time (sec): 445.79 - samples/sec: 174.41 - lr: 0.000006
1018
- 2023-07-26 20:08:31,078 ----------------------------------------------------------------------------------------------------
1019
- 2023-07-26 20:08:31,079 EPOCH 45 done: loss 0.2503 - lr 0.000006
1020
- 2023-07-26 20:08:32,834 Evaluating as a multi-label problem: False
1021
- 2023-07-26 20:08:32,877 DEV : loss 0.2550533413887024 - f1-score (micro avg) 0.9788
1022
- 2023-07-26 20:08:32,887 ----------------------------------------------------------------------------------------------------
1023
- 2023-07-26 20:09:17,479 epoch 46 - iter 24/243 - loss 0.24479678 - time (sec): 44.59 - samples/sec: 177.79 - lr: 0.000006
1024
- 2023-07-26 20:10:02,067 epoch 46 - iter 48/243 - loss 0.24138586 - time (sec): 89.18 - samples/sec: 175.65 - lr: 0.000005
1025
- 2023-07-26 20:10:46,638 epoch 46 - iter 72/243 - loss 0.24404064 - time (sec): 133.75 - samples/sec: 175.18 - lr: 0.000005
1026
- 2023-07-26 20:11:31,127 epoch 46 - iter 96/243 - loss 0.24604064 - time (sec): 178.24 - samples/sec: 174.01 - lr: 0.000005
1027
- 2023-07-26 20:12:15,792 epoch 46 - iter 120/243 - loss 0.24783294 - time (sec): 222.91 - samples/sec: 174.51 - lr: 0.000005
1028
- 2023-07-26 20:13:00,505 epoch 46 - iter 144/243 - loss 0.24973562 - time (sec): 267.62 - samples/sec: 174.34 - lr: 0.000005
1029
- 2023-07-26 20:13:45,181 epoch 46 - iter 168/243 - loss 0.24967162 - time (sec): 312.29 - samples/sec: 173.97 - lr: 0.000005
1030
- 2023-07-26 20:14:30,156 epoch 46 - iter 192/243 - loss 0.25131667 - time (sec): 357.27 - samples/sec: 173.94 - lr: 0.000005
1031
- 2023-07-26 20:15:14,977 epoch 46 - iter 216/243 - loss 0.25004815 - time (sec): 402.09 - samples/sec: 174.06 - lr: 0.000005
1032
- 2023-07-26 20:15:59,586 epoch 46 - iter 240/243 - loss 0.24797003 - time (sec): 446.70 - samples/sec: 174.19 - lr: 0.000005
1033
- 2023-07-26 20:16:04,601 ----------------------------------------------------------------------------------------------------
1034
- 2023-07-26 20:16:04,602 EPOCH 46 done: loss 0.2475 - lr 0.000005
1035
- 2023-07-26 20:16:06,359 Evaluating as a multi-label problem: False
1036
- 2023-07-26 20:16:06,401 DEV : loss 0.2502936124801636 - f1-score (micro avg) 0.9796
1037
- 2023-07-26 20:16:06,411 ----------------------------------------------------------------------------------------------------
1038
- 2023-07-26 20:16:50,970 epoch 47 - iter 24/243 - loss 0.24652539 - time (sec): 44.56 - samples/sec: 177.11 - lr: 0.000004
1039
- 2023-07-26 20:17:35,687 epoch 47 - iter 48/243 - loss 0.25432254 - time (sec): 89.28 - samples/sec: 178.43 - lr: 0.000004
1040
- 2023-07-26 20:18:20,313 epoch 47 - iter 72/243 - loss 0.24907829 - time (sec): 133.90 - samples/sec: 178.67 - lr: 0.000004
1041
- 2023-07-26 20:19:04,573 epoch 47 - iter 96/243 - loss 0.25143514 - time (sec): 178.16 - samples/sec: 175.41 - lr: 0.000004
1042
- 2023-07-26 20:19:49,067 epoch 47 - iter 120/243 - loss 0.25195942 - time (sec): 222.66 - samples/sec: 174.82 - lr: 0.000004
1043
- 2023-07-26 20:20:33,729 epoch 47 - iter 144/243 - loss 0.25140692 - time (sec): 267.32 - samples/sec: 175.12 - lr: 0.000004
1044
- 2023-07-26 20:21:18,294 epoch 47 - iter 168/243 - loss 0.25098133 - time (sec): 311.88 - samples/sec: 175.27 - lr: 0.000004
1045
- 2023-07-26 20:22:02,731 epoch 47 - iter 192/243 - loss 0.24903435 - time (sec): 356.32 - samples/sec: 174.38 - lr: 0.000004
1046
- 2023-07-26 20:22:47,241 epoch 47 - iter 216/243 - loss 0.24707558 - time (sec): 400.83 - samples/sec: 174.35 - lr: 0.000004
1047
- 2023-07-26 20:23:31,808 epoch 47 - iter 240/243 - loss 0.24996260 - time (sec): 445.40 - samples/sec: 174.50 - lr: 0.000003
1048
- 2023-07-26 20:23:36,885 ----------------------------------------------------------------------------------------------------
1049
- 2023-07-26 20:23:36,885 EPOCH 47 done: loss 0.2500 - lr 0.000003
1050
- 2023-07-26 20:23:38,718 Evaluating as a multi-label problem: False
1051
- 2023-07-26 20:23:38,760 DEV : loss 0.25260353088378906 - f1-score (micro avg) 0.9788
1052
- 2023-07-26 20:23:38,770 ----------------------------------------------------------------------------------------------------
1053
- 2023-07-26 20:24:23,284 epoch 48 - iter 24/243 - loss 0.26092477 - time (sec): 44.51 - samples/sec: 173.72 - lr: 0.000003
1054
- 2023-07-26 20:25:07,731 epoch 48 - iter 48/243 - loss 0.26380496 - time (sec): 88.96 - samples/sec: 172.51 - lr: 0.000003
1055
- 2023-07-26 20:25:52,549 epoch 48 - iter 72/243 - loss 0.26586966 - time (sec): 133.78 - samples/sec: 175.68 - lr: 0.000003
1056
- 2023-07-26 20:26:37,081 epoch 48 - iter 96/243 - loss 0.26118560 - time (sec): 178.31 - samples/sec: 175.37 - lr: 0.000003
1057
- 2023-07-26 20:27:21,769 epoch 48 - iter 120/243 - loss 0.25715945 - time (sec): 223.00 - samples/sec: 176.11 - lr: 0.000003
1058
- 2023-07-26 20:28:06,589 epoch 48 - iter 144/243 - loss 0.25935501 - time (sec): 267.82 - samples/sec: 176.32 - lr: 0.000003
1059
- 2023-07-26 20:28:51,230 epoch 48 - iter 168/243 - loss 0.25807126 - time (sec): 312.46 - samples/sec: 175.36 - lr: 0.000003
1060
- 2023-07-26 20:29:35,872 epoch 48 - iter 192/243 - loss 0.25819322 - time (sec): 357.10 - samples/sec: 174.73 - lr: 0.000003
1061
- 2023-07-26 20:30:20,621 epoch 48 - iter 216/243 - loss 0.25780077 - time (sec): 401.85 - samples/sec: 174.84 - lr: 0.000002
1062
- 2023-07-26 20:31:05,115 epoch 48 - iter 240/243 - loss 0.25669533 - time (sec): 446.34 - samples/sec: 174.17 - lr: 0.000002
1063
- 2023-07-26 20:31:10,189 ----------------------------------------------------------------------------------------------------
1064
- 2023-07-26 20:31:10,189 EPOCH 48 done: loss 0.2562 - lr 0.000002
1065
- 2023-07-26 20:31:11,946 Evaluating as a multi-label problem: False
1066
- 2023-07-26 20:31:11,989 DEV : loss 0.2517630159854889 - f1-score (micro avg) 0.9793
1067
- 2023-07-26 20:31:11,998 ----------------------------------------------------------------------------------------------------
1068
- 2023-07-26 20:31:56,576 epoch 49 - iter 24/243 - loss 0.27952006 - time (sec): 44.58 - samples/sec: 171.79 - lr: 0.000002
1069
- 2023-07-26 20:32:41,285 epoch 49 - iter 48/243 - loss 0.26483505 - time (sec): 89.29 - samples/sec: 172.32 - lr: 0.000002
1070
- 2023-07-26 20:33:25,782 epoch 49 - iter 72/243 - loss 0.25971199 - time (sec): 133.78 - samples/sec: 171.90 - lr: 0.000002
1071
- 2023-07-26 20:34:10,460 epoch 49 - iter 96/243 - loss 0.25971123 - time (sec): 178.46 - samples/sec: 173.31 - lr: 0.000002
1072
- 2023-07-26 20:34:55,145 epoch 49 - iter 120/243 - loss 0.25121870 - time (sec): 223.15 - samples/sec: 174.45 - lr: 0.000002
1073
- 2023-07-26 20:35:39,794 epoch 49 - iter 144/243 - loss 0.24985456 - time (sec): 267.80 - samples/sec: 174.14 - lr: 0.000002
1074
- 2023-07-26 20:36:24,454 epoch 49 - iter 168/243 - loss 0.25019492 - time (sec): 312.46 - samples/sec: 173.74 - lr: 0.000002
1075
- 2023-07-26 20:37:09,180 epoch 49 - iter 192/243 - loss 0.24964407 - time (sec): 357.18 - samples/sec: 174.05 - lr: 0.000001
1076
- 2023-07-26 20:37:53,667 epoch 49 - iter 216/243 - loss 0.24966262 - time (sec): 401.67 - samples/sec: 173.91 - lr: 0.000001
1077
- 2023-07-26 20:38:38,222 epoch 49 - iter 240/243 - loss 0.24839303 - time (sec): 446.22 - samples/sec: 173.82 - lr: 0.000001
1078
- 2023-07-26 20:38:43,407 ----------------------------------------------------------------------------------------------------
1079
- 2023-07-26 20:38:43,407 EPOCH 49 done: loss 0.2480 - lr 0.000001
1080
- 2023-07-26 20:38:45,164 Evaluating as a multi-label problem: False
1081
- 2023-07-26 20:38:45,206 DEV : loss 0.25181668996810913 - f1-score (micro avg) 0.9786
1082
- 2023-07-26 20:38:45,216 ----------------------------------------------------------------------------------------------------
1083
- 2023-07-26 20:39:30,103 epoch 50 - iter 24/243 - loss 0.26114983 - time (sec): 44.89 - samples/sec: 184.97 - lr: 0.000001
1084
- 2023-07-26 20:40:14,469 epoch 50 - iter 48/243 - loss 0.24629344 - time (sec): 89.25 - samples/sec: 177.23 - lr: 0.000001
1085
- 2023-07-26 20:40:58,962 epoch 50 - iter 72/243 - loss 0.24771674 - time (sec): 133.75 - samples/sec: 176.12 - lr: 0.000001
1086
- 2023-07-26 20:41:43,633 epoch 50 - iter 96/243 - loss 0.24705085 - time (sec): 178.42 - samples/sec: 176.67 - lr: 0.000001
1087
- 2023-07-26 20:42:28,058 epoch 50 - iter 120/243 - loss 0.24435267 - time (sec): 222.84 - samples/sec: 175.63 - lr: 0.000001
1088
- 2023-07-26 20:43:12,552 epoch 50 - iter 144/243 - loss 0.24537610 - time (sec): 267.34 - samples/sec: 175.26 - lr: 0.000001
1089
- 2023-07-26 20:43:57,183 epoch 50 - iter 168/243 - loss 0.24725247 - time (sec): 311.97 - samples/sec: 175.35 - lr: 0.000000
1090
- 2023-07-26 20:44:42,166 epoch 50 - iter 192/243 - loss 0.24773009 - time (sec): 356.95 - samples/sec: 174.58 - lr: 0.000000
1091
- 2023-07-26 20:45:27,096 epoch 50 - iter 216/243 - loss 0.24906212 - time (sec): 401.88 - samples/sec: 173.96 - lr: 0.000000
1092
- 2023-07-26 20:46:12,548 epoch 50 - iter 240/243 - loss 0.24977353 - time (sec): 447.33 - samples/sec: 173.87 - lr: 0.000000
1093
- 2023-07-26 20:46:17,709 ----------------------------------------------------------------------------------------------------
1094
- 2023-07-26 20:46:17,709 EPOCH 50 done: loss 0.2503 - lr 0.000000
1095
- 2023-07-26 20:46:19,451 Evaluating as a multi-label problem: False
1096
- 2023-07-26 20:46:19,493 DEV : loss 0.2513697147369385 - f1-score (micro avg) 0.9784
1097
- 2023-07-26 20:46:22,002 Test data not provided setting final score to 0
 
1
+ 2023-08-17 13:20:08,970 ----------------------------------------------------------------------------------------------------
2
+ 2023-08-17 13:20:08,977 Model: "SequenceTagger(
3
  (embeddings): TransformerWordEmbeddings(
4
  (model): XLMRobertaModel(
5
  (embeddings): XLMRobertaEmbeddings(
 
313
  (loss_function): ViterbiLoss()
314
  (crf): CRF()
315
  )"
316
+ 2023-08-17 13:20:08,995 ----------------------------------------------------------------------------------------------------
317
+ 2023-08-17 13:20:08,996 Corpus: "Corpus: 7767 train + 409 dev + 0 test sentences"
318
+ 2023-08-17 13:20:08,997 ----------------------------------------------------------------------------------------------------
319
+ 2023-08-17 13:20:08,997 Parameters:
320
+ 2023-08-17 13:20:08,997 - learning_rate: "0.000050"
321
+ 2023-08-17 13:20:08,998 - mini_batch_size: "32"
322
+ 2023-08-17 13:20:08,998 - patience: "3"
323
+ 2023-08-17 13:20:08,998 - anneal_factor: "0.5"
324
+ 2023-08-17 13:20:08,999 - max_epochs: "2"
325
+ 2023-08-17 13:20:08,999 - shuffle: "True"
326
+ 2023-08-17 13:20:09,000 - train_with_dev: "False"
327
+ 2023-08-17 13:20:09,000 - batch_growth_annealing: "False"
328
+ 2023-08-17 13:20:09,000 ----------------------------------------------------------------------------------------------------
329
+ 2023-08-17 13:20:09,001 Model training base path: "/scratch/skulick/ppchy-11-pos/xlmb-ck05-yid1/split_final/train"
330
+ 2023-08-17 13:20:09,001 ----------------------------------------------------------------------------------------------------
331
+ 2023-08-17 13:20:09,001 Device: cuda:0
332
+ 2023-08-17 13:20:09,002 ----------------------------------------------------------------------------------------------------
333
+ 2023-08-17 13:20:09,002 Embeddings storage mode: none
334
+ 2023-08-17 13:20:09,002 ----------------------------------------------------------------------------------------------------
335
+ 2023-08-17 13:21:05,834 epoch 1 - iter 24/243 - loss 5.52841502 - time (sec): 56.83 - samples/sec: 131.44 - lr: 0.000025
336
+ 2023-08-17 13:22:03,318 epoch 1 - iter 48/243 - loss 4.70686211 - time (sec): 114.32 - samples/sec: 130.45 - lr: 0.000050
337
+ 2023-08-17 13:23:00,549 epoch 1 - iter 72/243 - loss 3.86110162 - time (sec): 171.55 - samples/sec: 131.94 - lr: 0.000047
338
+ 2023-08-17 13:23:57,695 epoch 1 - iter 96/243 - loss 3.22106003 - time (sec): 228.69 - samples/sec: 132.37 - lr: 0.000045
339
+ 2023-08-17 13:24:55,039 epoch 1 - iter 120/243 - loss 2.77518007 - time (sec): 286.04 - samples/sec: 132.92 - lr: 0.000042
340
+ 2023-08-17 13:25:52,345 epoch 1 - iter 144/243 - loss 2.46009763 - time (sec): 343.34 - samples/sec: 133.06 - lr: 0.000039
341
+ 2023-08-17 13:26:49,831 epoch 1 - iter 168/243 - loss 2.21288400 - time (sec): 400.83 - samples/sec: 134.04 - lr: 0.000036
342
+ 2023-08-17 13:27:47,964 epoch 1 - iter 192/243 - loss 2.01670410 - time (sec): 458.96 - samples/sec: 134.63 - lr: 0.000034
343
+ 2023-08-17 13:28:45,494 epoch 1 - iter 216/243 - loss 1.86783335 - time (sec): 516.49 - samples/sec: 134.47 - lr: 0.000031
344
+ 2023-08-17 13:29:43,119 epoch 1 - iter 240/243 - loss 1.74523925 - time (sec): 574.12 - samples/sec: 135.25 - lr: 0.000028
345
+ 2023-08-17 13:29:50,011 ----------------------------------------------------------------------------------------------------
346
+ 2023-08-17 13:29:50,011 EPOCH 1 done: loss 1.7334 - lr 0.000028
347
+ 2023-08-17 13:29:52,277 Evaluating as a multi-label problem: False
348
+ 2023-08-17 13:29:52,376 DEV : loss 0.3509514629840851 - f1-score (micro avg) 0.9331
349
+ 2023-08-17 13:29:52,410 saving best model
350
+ 2023-08-17 13:29:54,774 ----------------------------------------------------------------------------------------------------
351
+ 2023-08-17 13:30:44,972 epoch 2 - iter 24/243 - loss 0.58877620 - time (sec): 50.20 - samples/sec: 152.66 - lr: 0.000025
352
+ 2023-08-17 13:31:36,455 epoch 2 - iter 48/243 - loss 0.60804646 - time (sec): 101.68 - samples/sec: 152.75 - lr: 0.000022
353
+ 2023-08-17 13:32:27,132 epoch 2 - iter 72/243 - loss 0.60136722 - time (sec): 152.36 - samples/sec: 153.64 - lr: 0.000020
354
+ 2023-08-17 13:33:17,902 epoch 2 - iter 96/243 - loss 0.59255541 - time (sec): 203.13 - samples/sec: 154.55 - lr: 0.000017
355
+ 2023-08-17 13:34:08,949 epoch 2 - iter 120/243 - loss 0.58957421 - time (sec): 254.17 - samples/sec: 154.79 - lr: 0.000014
356
+ 2023-08-17 13:35:00,256 epoch 2 - iter 144/243 - loss 0.58878210 - time (sec): 305.48 - samples/sec: 154.48 - lr: 0.000011
357
+ 2023-08-17 13:35:51,214 epoch 2 - iter 168/243 - loss 0.58168957 - time (sec): 356.44 - samples/sec: 153.84 - lr: 0.000009
358
+ 2023-08-17 13:36:42,167 epoch 2 - iter 192/243 - loss 0.57403444 - time (sec): 407.39 - samples/sec: 153.55 - lr: 0.000006
359
+ 2023-08-17 13:37:32,761 epoch 2 - iter 216/243 - loss 0.57331317 - time (sec): 457.99 - samples/sec: 152.68 - lr: 0.000003
360
+ 2023-08-17 13:38:23,745 epoch 2 - iter 240/243 - loss 0.56849021 - time (sec): 508.97 - samples/sec: 152.71 - lr: 0.000000
361
+ 2023-08-17 13:38:29,500 ----------------------------------------------------------------------------------------------------
362
+ 2023-08-17 13:38:29,500 EPOCH 2 done: loss 0.5679 - lr 0.000000
363
+ 2023-08-17 13:38:31,769 Evaluating as a multi-label problem: False
364
+ 2023-08-17 13:38:31,868 DEV : loss 0.23018118739128113 - f1-score (micro avg) 0.9562
365
+ 2023-08-17 13:38:31,902 saving best model
366
+ 2023-08-17 13:38:37,560 Test data not provided setting final score to 0