File size: 14,664 Bytes
c89209e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 |
2024-07-29 09:42:42,423 ---------------------------------------------------------------------------------------------------- 2024-07-29 09:42:42,423 Training Model 2024-07-29 09:42:42,423 ---------------------------------------------------------------------------------------------------- 2024-07-29 09:42:42,423 Translator( (encoder): EncoderLSTM( (embedding): Embedding(22834, 300, padding_idx=0) (dropout): Dropout(p=0.1, inplace=False) (lstm): LSTM(300, 512, batch_first=True) ) (decoder): DecoderLSTM( (embedding): Embedding(14303, 300, padding_idx=0) (dropout): Dropout(p=0.1, inplace=False) (lstm): LSTM(300, 512, batch_first=True) (attention): DotProductAttention( (softmax): Softmax(dim=-1) (combined2hidden): Sequential( (0): Linear(in_features=1024, out_features=512, bias=True) (1): ReLU() ) ) (hidden2vocab): Linear(in_features=512, out_features=14303, bias=True) (log_softmax): LogSoftmax(dim=-1) ) ) 2024-07-29 09:42:42,423 ---------------------------------------------------------------------------------------------------- 2024-07-29 09:42:42,423 Training Hyperparameters: 2024-07-29 09:42:42,423 - max_epochs: 10 2024-07-29 09:42:42,423 - learning_rate: 0.001 2024-07-29 09:42:42,423 - batch_size: 128 2024-07-29 09:42:42,423 - patience: 5 2024-07-29 09:42:42,423 - scheduler_patience: 3 2024-07-29 09:42:42,423 - teacher_forcing_ratio: 0.5 2024-07-29 09:42:42,423 ---------------------------------------------------------------------------------------------------- 2024-07-29 09:42:42,423 Computational Parameters: 2024-07-29 09:42:42,423 - num_workers: 4 2024-07-29 09:42:42,423 - device: device(type='cuda', index=0) 2024-07-29 09:42:42,423 ---------------------------------------------------------------------------------------------------- 2024-07-29 09:42:42,423 Dataset Splits: 2024-07-29 09:42:42,423 - train: 133623 data points 2024-07-29 09:42:42,423 - dev: 19090 data points 2024-07-29 09:42:42,423 - test: 38179 data points 2024-07-29 09:42:42,424 ---------------------------------------------------------------------------------------------------- 2024-07-29 09:42:42,424 EPOCH 1 2024-07-29 09:43:17,154 batch 104/1044 - loss 6.42034669 - lr 0.0010 - time 34.73s 2024-07-29 09:43:54,707 batch 208/1044 - loss 6.17063731 - lr 0.0010 - time 72.28s 2024-07-29 09:44:32,541 batch 312/1044 - loss 6.00517355 - lr 0.0010 - time 110.12s 2024-07-29 09:45:10,795 batch 416/1044 - loss 5.87077612 - lr 0.0010 - time 148.37s 2024-07-29 09:45:47,671 batch 520/1044 - loss 5.75463560 - lr 0.0010 - time 185.25s 2024-07-29 09:46:24,949 batch 624/1044 - loss 5.65824632 - lr 0.0010 - time 222.53s 2024-07-29 09:47:03,552 batch 728/1044 - loss 5.56939856 - lr 0.0010 - time 261.13s 2024-07-29 09:47:40,687 batch 832/1044 - loss 5.49128213 - lr 0.0010 - time 298.26s 2024-07-29 09:48:18,127 batch 936/1044 - loss 5.41966415 - lr 0.0010 - time 335.70s 2024-07-29 09:48:55,119 batch 1040/1044 - loss 5.35489992 - lr 0.0010 - time 372.70s 2024-07-29 09:48:56,680 ---------------------------------------------------------------------------------------------------- 2024-07-29 09:48:56,681 EPOCH 1 DONE 2024-07-29 09:49:06,113 TRAIN Loss: 5.3525 2024-07-29 09:49:06,114 DEV Loss: 5.5692 2024-07-29 09:49:06,114 DEV Perplexity: 262.2315 2024-07-29 09:49:06,114 New best score! 2024-07-29 09:49:06,115 ---------------------------------------------------------------------------------------------------- 2024-07-29 09:49:06,115 EPOCH 2 2024-07-29 09:49:41,222 batch 104/1044 - loss 4.62738995 - lr 0.0010 - time 35.11s 2024-07-29 09:50:17,864 batch 208/1044 - loss 4.59759969 - lr 0.0010 - time 71.75s 2024-07-29 09:50:53,411 batch 312/1044 - loss 4.57657494 - lr 0.0010 - time 107.30s 2024-07-29 09:51:31,209 batch 416/1044 - loss 4.54348163 - lr 0.0010 - time 145.09s 2024-07-29 09:52:11,697 batch 520/1044 - loss 4.51823422 - lr 0.0010 - time 185.58s 2024-07-29 09:52:48,926 batch 624/1044 - loss 4.49001330 - lr 0.0010 - time 222.81s 2024-07-29 09:53:24,588 batch 728/1044 - loss 4.46876206 - lr 0.0010 - time 258.47s 2024-07-29 09:54:02,468 batch 832/1044 - loss 4.44477118 - lr 0.0010 - time 296.35s 2024-07-29 09:54:39,911 batch 936/1044 - loss 4.42371725 - lr 0.0010 - time 333.80s 2024-07-29 09:55:16,492 batch 1040/1044 - loss 4.40068238 - lr 0.0010 - time 370.38s 2024-07-29 09:55:18,277 ---------------------------------------------------------------------------------------------------- 2024-07-29 09:55:18,279 EPOCH 2 DONE 2024-07-29 09:55:27,546 TRAIN Loss: 4.3997 2024-07-29 09:55:27,546 DEV Loss: 5.2857 2024-07-29 09:55:27,546 DEV Perplexity: 197.4908 2024-07-29 09:55:27,546 New best score! 2024-07-29 09:55:27,547 ---------------------------------------------------------------------------------------------------- 2024-07-29 09:55:27,547 EPOCH 3 2024-07-29 09:56:04,874 batch 104/1044 - loss 4.04292682 - lr 0.0010 - time 37.33s 2024-07-29 09:56:44,240 batch 208/1044 - loss 4.04458403 - lr 0.0010 - time 76.69s 2024-07-29 09:57:19,595 batch 312/1044 - loss 4.04015087 - lr 0.0010 - time 112.05s 2024-07-29 09:57:58,341 batch 416/1044 - loss 4.03473626 - lr 0.0010 - time 150.79s 2024-07-29 09:58:33,685 batch 520/1044 - loss 4.02294693 - lr 0.0010 - time 186.14s 2024-07-29 09:59:09,374 batch 624/1044 - loss 4.00945110 - lr 0.0010 - time 221.83s 2024-07-29 09:59:49,125 batch 728/1044 - loss 4.00042684 - lr 0.0010 - time 261.58s 2024-07-29 10:00:26,299 batch 832/1044 - loss 3.99049270 - lr 0.0010 - time 298.75s 2024-07-29 10:01:03,713 batch 936/1044 - loss 3.97934972 - lr 0.0010 - time 336.17s 2024-07-29 10:01:40,625 batch 1040/1044 - loss 3.96891846 - lr 0.0010 - time 373.08s 2024-07-29 10:01:41,787 ---------------------------------------------------------------------------------------------------- 2024-07-29 10:01:41,789 EPOCH 3 DONE 2024-07-29 10:01:51,163 TRAIN Loss: 3.9687 2024-07-29 10:01:51,163 DEV Loss: 5.2440 2024-07-29 10:01:51,163 DEV Perplexity: 189.4295 2024-07-29 10:01:51,163 New best score! 2024-07-29 10:01:51,164 ---------------------------------------------------------------------------------------------------- 2024-07-29 10:01:51,164 EPOCH 4 2024-07-29 10:02:31,057 batch 104/1044 - loss 3.74893653 - lr 0.0010 - time 39.89s 2024-07-29 10:03:05,331 batch 208/1044 - loss 3.75399486 - lr 0.0010 - time 74.17s 2024-07-29 10:03:41,466 batch 312/1044 - loss 3.75771751 - lr 0.0010 - time 110.30s 2024-07-29 10:04:15,960 batch 416/1044 - loss 3.75979321 - lr 0.0010 - time 144.80s 2024-07-29 10:04:55,428 batch 520/1044 - loss 3.75057765 - lr 0.0010 - time 184.26s 2024-07-29 10:05:33,137 batch 624/1044 - loss 3.74305481 - lr 0.0010 - time 221.97s 2024-07-29 10:06:09,059 batch 728/1044 - loss 3.73923583 - lr 0.0010 - time 257.89s 2024-07-29 10:06:47,012 batch 832/1044 - loss 3.73675085 - lr 0.0010 - time 295.85s 2024-07-29 10:07:23,641 batch 936/1044 - loss 3.73419790 - lr 0.0010 - time 332.48s 2024-07-29 10:07:58,748 batch 1040/1044 - loss 3.72953442 - lr 0.0010 - time 367.58s 2024-07-29 10:08:00,245 ---------------------------------------------------------------------------------------------------- 2024-07-29 10:08:00,246 EPOCH 4 DONE 2024-07-29 10:08:09,716 TRAIN Loss: 3.7292 2024-07-29 10:08:09,717 DEV Loss: 5.1546 2024-07-29 10:08:09,717 DEV Perplexity: 173.2260 2024-07-29 10:08:09,717 New best score! 2024-07-29 10:08:09,718 ---------------------------------------------------------------------------------------------------- 2024-07-29 10:08:09,718 EPOCH 5 2024-07-29 10:08:48,898 batch 104/1044 - loss 3.53810529 - lr 0.0010 - time 39.18s 2024-07-29 10:09:24,261 batch 208/1044 - loss 3.54713277 - lr 0.0010 - time 74.54s 2024-07-29 10:09:59,554 batch 312/1044 - loss 3.55520624 - lr 0.0010 - time 109.84s 2024-07-29 10:10:35,964 batch 416/1044 - loss 3.54529557 - lr 0.0010 - time 146.25s 2024-07-29 10:11:13,273 batch 520/1044 - loss 3.53952308 - lr 0.0010 - time 183.56s 2024-07-29 10:11:49,699 batch 624/1044 - loss 3.53902453 - lr 0.0010 - time 219.98s 2024-07-29 10:12:26,577 batch 728/1044 - loss 3.54207764 - lr 0.0010 - time 256.86s 2024-07-29 10:13:03,988 batch 832/1044 - loss 3.54191658 - lr 0.0010 - time 294.27s 2024-07-29 10:13:44,152 batch 936/1044 - loss 3.54287420 - lr 0.0010 - time 334.43s 2024-07-29 10:14:19,848 batch 1040/1044 - loss 3.54355186 - lr 0.0010 - time 370.13s 2024-07-29 10:14:21,679 ---------------------------------------------------------------------------------------------------- 2024-07-29 10:14:21,680 EPOCH 5 DONE 2024-07-29 10:14:31,157 TRAIN Loss: 3.5436 2024-07-29 10:14:31,157 DEV Loss: 5.1595 2024-07-29 10:14:31,157 DEV Perplexity: 174.0773 2024-07-29 10:14:31,157 No improvement for 1 epoch(s) 2024-07-29 10:14:31,157 ---------------------------------------------------------------------------------------------------- 2024-07-29 10:14:31,157 EPOCH 6 2024-07-29 10:15:09,004 batch 104/1044 - loss 3.37988193 - lr 0.0010 - time 37.85s 2024-07-29 10:15:46,449 batch 208/1044 - loss 3.39972965 - lr 0.0010 - time 75.29s 2024-07-29 10:16:23,877 batch 312/1044 - loss 3.41839841 - lr 0.0010 - time 112.72s 2024-07-29 10:17:02,860 batch 416/1044 - loss 3.42049147 - lr 0.0010 - time 151.70s 2024-07-29 10:17:39,715 batch 520/1044 - loss 3.42189572 - lr 0.0010 - time 188.56s 2024-07-29 10:18:16,287 batch 624/1044 - loss 3.41934290 - lr 0.0010 - time 225.13s 2024-07-29 10:18:49,350 batch 728/1044 - loss 3.42369204 - lr 0.0010 - time 258.19s 2024-07-29 10:19:27,406 batch 832/1044 - loss 3.42245102 - lr 0.0010 - time 296.25s 2024-07-29 10:20:04,324 batch 936/1044 - loss 3.42058108 - lr 0.0010 - time 333.17s 2024-07-29 10:20:39,261 batch 1040/1044 - loss 3.42255051 - lr 0.0010 - time 368.10s 2024-07-29 10:20:43,715 ---------------------------------------------------------------------------------------------------- 2024-07-29 10:20:43,717 EPOCH 6 DONE 2024-07-29 10:20:53,217 TRAIN Loss: 3.4223 2024-07-29 10:20:53,218 DEV Loss: 5.1826 2024-07-29 10:20:53,218 DEV Perplexity: 178.1495 2024-07-29 10:20:53,218 No improvement for 2 epoch(s) 2024-07-29 10:20:53,218 ---------------------------------------------------------------------------------------------------- 2024-07-29 10:20:53,218 EPOCH 7 2024-07-29 10:21:31,444 batch 104/1044 - loss 3.29632874 - lr 0.0010 - time 38.23s 2024-07-29 10:22:10,060 batch 208/1044 - loss 3.29179441 - lr 0.0010 - time 76.84s 2024-07-29 10:22:45,065 batch 312/1044 - loss 3.28852440 - lr 0.0010 - time 111.85s 2024-07-29 10:23:21,129 batch 416/1044 - loss 3.29654682 - lr 0.0010 - time 147.91s 2024-07-29 10:23:58,897 batch 520/1044 - loss 3.30062932 - lr 0.0010 - time 185.68s 2024-07-29 10:24:37,910 batch 624/1044 - loss 3.31254658 - lr 0.0010 - time 224.69s 2024-07-29 10:25:15,978 batch 728/1044 - loss 3.31376025 - lr 0.0010 - time 262.76s 2024-07-29 10:25:53,003 batch 832/1044 - loss 3.31953892 - lr 0.0010 - time 299.79s 2024-07-29 10:26:30,024 batch 936/1044 - loss 3.32268426 - lr 0.0010 - time 336.81s 2024-07-29 10:27:05,685 batch 1040/1044 - loss 3.32460238 - lr 0.0010 - time 372.47s 2024-07-29 10:27:06,955 ---------------------------------------------------------------------------------------------------- 2024-07-29 10:27:06,957 EPOCH 7 DONE 2024-07-29 10:27:16,539 TRAIN Loss: 3.3246 2024-07-29 10:27:16,539 DEV Loss: 5.2310 2024-07-29 10:27:16,539 DEV Perplexity: 186.9724 2024-07-29 10:27:16,539 No improvement for 3 epoch(s) 2024-07-29 10:27:16,539 ---------------------------------------------------------------------------------------------------- 2024-07-29 10:27:16,539 EPOCH 8 2024-07-29 10:27:55,681 batch 104/1044 - loss 3.18067933 - lr 0.0010 - time 39.14s 2024-07-29 10:28:30,973 batch 208/1044 - loss 3.20228673 - lr 0.0010 - time 74.43s 2024-07-29 10:29:06,064 batch 312/1044 - loss 3.20549937 - lr 0.0010 - time 109.53s 2024-07-29 10:29:43,870 batch 416/1044 - loss 3.21897588 - lr 0.0010 - time 147.33s 2024-07-29 10:30:19,159 batch 520/1044 - loss 3.22153870 - lr 0.0010 - time 182.62s 2024-07-29 10:30:55,565 batch 624/1044 - loss 3.22599725 - lr 0.0010 - time 219.03s 2024-07-29 10:31:33,714 batch 728/1044 - loss 3.22878759 - lr 0.0010 - time 257.18s 2024-07-29 10:32:10,440 batch 832/1044 - loss 3.23212968 - lr 0.0010 - time 293.90s 2024-07-29 10:32:48,422 batch 936/1044 - loss 3.23624962 - lr 0.0010 - time 331.88s 2024-07-29 10:33:24,964 batch 1040/1044 - loss 3.23659680 - lr 0.0010 - time 368.42s 2024-07-29 10:33:26,214 ---------------------------------------------------------------------------------------------------- 2024-07-29 10:33:26,216 EPOCH 8 DONE 2024-07-29 10:33:35,755 TRAIN Loss: 3.2367 2024-07-29 10:33:35,756 DEV Loss: 5.2968 2024-07-29 10:33:35,756 DEV Perplexity: 199.6878 2024-07-29 10:33:35,756 No improvement for 4 epoch(s) 2024-07-29 10:33:35,756 ---------------------------------------------------------------------------------------------------- 2024-07-29 10:33:35,756 EPOCH 9 2024-07-29 10:34:15,083 batch 104/1044 - loss 3.08033091 - lr 0.0001 - time 39.33s 2024-07-29 10:34:52,691 batch 208/1044 - loss 3.07522689 - lr 0.0001 - time 76.93s 2024-07-29 10:35:29,151 batch 312/1044 - loss 3.06626054 - lr 0.0001 - time 113.39s 2024-07-29 10:36:06,720 batch 416/1044 - loss 3.06839789 - lr 0.0001 - time 150.96s 2024-07-29 10:36:41,167 batch 520/1044 - loss 3.06539460 - lr 0.0001 - time 185.41s 2024-07-29 10:37:17,074 batch 624/1044 - loss 3.06574041 - lr 0.0001 - time 221.32s 2024-07-29 10:37:54,392 batch 728/1044 - loss 3.06843089 - lr 0.0001 - time 258.64s 2024-07-29 10:38:31,689 batch 832/1044 - loss 3.06777010 - lr 0.0001 - time 295.93s 2024-07-29 10:39:06,956 batch 936/1044 - loss 3.06646013 - lr 0.0001 - time 331.20s 2024-07-29 10:39:45,993 batch 1040/1044 - loss 3.06478271 - lr 0.0001 - time 370.24s 2024-07-29 10:39:47,096 ---------------------------------------------------------------------------------------------------- 2024-07-29 10:39:47,098 EPOCH 9 DONE 2024-07-29 10:39:56,496 TRAIN Loss: 3.0646 2024-07-29 10:39:56,497 DEV Loss: 5.1945 2024-07-29 10:39:56,497 DEV Perplexity: 180.2739 2024-07-29 10:39:56,497 No improvement for 5 epoch(s) 2024-07-29 10:39:56,497 Patience reached: Terminating model training due to early stopping 2024-07-29 10:39:56,497 ---------------------------------------------------------------------------------------------------- 2024-07-29 10:39:56,497 Finished Training 2024-07-29 10:40:14,449 TEST Perplexity: 173.0781 2024-07-29 10:49:34,588 TEST BLEU = 17.27 82.9/65.2/22.1/0.7 (BP = 1.000 ratio = 1.000 hyp_len = 70 ref_len = 70) |