|
2024-07-29 09:14:44,992 ---------------------------------------------------------------------------------------------------- |
|
2024-07-29 09:14:44,992 Training Model |
|
2024-07-29 09:14:44,992 ---------------------------------------------------------------------------------------------------- |
|
2024-07-29 09:14:44,992 Translator( |
|
(encoder): EncoderLSTM( |
|
(embedding): Embedding(14303, 300, padding_idx=0) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(lstm): LSTM(300, 512, batch_first=True) |
|
) |
|
(decoder): DecoderLSTM( |
|
(embedding): Embedding(22834, 300, padding_idx=0) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(lstm): LSTM(300, 512, batch_first=True) |
|
(attention): DotProductAttention( |
|
(softmax): Softmax(dim=-1) |
|
(combined2hidden): Sequential( |
|
(0): Linear(in_features=1024, out_features=512, bias=True) |
|
(1): ReLU() |
|
) |
|
) |
|
(hidden2vocab): Linear(in_features=512, out_features=22834, bias=True) |
|
(log_softmax): LogSoftmax(dim=-1) |
|
) |
|
) |
|
2024-07-29 09:14:44,992 ---------------------------------------------------------------------------------------------------- |
|
2024-07-29 09:14:44,992 Training Hyperparameters: |
|
2024-07-29 09:14:44,992 - max_epochs: 10 |
|
2024-07-29 09:14:44,992 - learning_rate: 0.001 |
|
2024-07-29 09:14:44,992 - batch_size: 128 |
|
2024-07-29 09:14:44,992 - patience: 5 |
|
2024-07-29 09:14:44,992 - scheduler_patience: 3 |
|
2024-07-29 09:14:44,992 - teacher_forcing_ratio: 0.5 |
|
2024-07-29 09:14:44,992 ---------------------------------------------------------------------------------------------------- |
|
2024-07-29 09:14:44,992 Computational Parameters: |
|
2024-07-29 09:14:44,992 - num_workers: 4 |
|
2024-07-29 09:14:44,992 - device: device(type='cuda', index=0) |
|
2024-07-29 09:14:44,992 ---------------------------------------------------------------------------------------------------- |
|
2024-07-29 09:14:44,992 Dataset Splits: |
|
2024-07-29 09:14:44,992 - train: 133623 data points |
|
2024-07-29 09:14:44,992 - dev: 19090 data points |
|
2024-07-29 09:14:44,992 - test: 38179 data points |
|
2024-07-29 09:14:44,992 ---------------------------------------------------------------------------------------------------- |
|
2024-07-29 09:14:44,992 EPOCH 1 |
|
2024-07-29 09:15:31,214 batch 104/1044 - loss 6.68762229 - lr 0.0010 - time 46.22s |
|
2024-07-29 09:16:13,594 batch 208/1044 - loss 6.40201276 - lr 0.0010 - time 88.60s |
|
2024-07-29 09:17:01,978 batch 312/1044 - loss 6.22368844 - lr 0.0010 - time 136.99s |
|
2024-07-29 09:17:50,779 batch 416/1044 - loss 6.07568611 - lr 0.0010 - time 185.79s |
|
2024-07-29 09:18:39,278 batch 520/1044 - loss 5.94940980 - lr 0.0010 - time 234.29s |
|
2024-07-29 09:19:27,693 batch 624/1044 - loss 5.84451641 - lr 0.0010 - time 282.70s |
|
2024-07-29 09:20:12,651 batch 728/1044 - loss 5.74972569 - lr 0.0010 - time 327.66s |
|
2024-07-29 09:20:56,581 batch 832/1044 - loss 5.66821399 - lr 0.0010 - time 371.59s |
|
2024-07-29 09:21:46,421 batch 936/1044 - loss 5.59120232 - lr 0.0010 - time 421.43s |
|
2024-07-29 09:22:32,658 batch 1040/1044 - loss 5.52066185 - lr 0.0010 - time 467.67s |
|
2024-07-29 09:22:34,966 ---------------------------------------------------------------------------------------------------- |
|
2024-07-29 09:22:34,967 EPOCH 1 DONE |
|
2024-07-29 09:22:44,703 TRAIN Loss: 5.5176 |
|
2024-07-29 09:22:44,703 DEV Loss: 5.6851 |
|
2024-07-29 09:22:44,703 DEV Perplexity: 294.4404 |
|
2024-07-29 09:22:44,703 New best score! |
|
2024-07-29 09:22:44,704 ---------------------------------------------------------------------------------------------------- |
|
2024-07-29 09:22:44,704 EPOCH 2 |
|
2024-07-29 09:23:30,459 batch 104/1044 - loss 4.70927820 - lr 0.0010 - time 45.75s |
|
2024-07-29 09:24:14,570 batch 208/1044 - loss 4.67138333 - lr 0.0010 - time 89.87s |
|
2024-07-29 09:25:04,646 batch 312/1044 - loss 4.64380088 - lr 0.0010 - time 139.94s |
|
2024-07-29 09:25:53,202 batch 416/1044 - loss 4.61442350 - lr 0.0010 - time 188.50s |
|
2024-07-29 09:26:40,127 batch 520/1044 - loss 4.58126969 - lr 0.0010 - time 235.42s |
|
2024-07-29 09:27:23,730 batch 624/1044 - loss 4.56022843 - lr 0.0010 - time 279.03s |
|
2024-07-29 09:28:10,576 batch 728/1044 - loss 4.53555526 - lr 0.0010 - time 325.87s |
|
2024-07-29 09:28:57,617 batch 832/1044 - loss 4.51384200 - lr 0.0010 - time 372.91s |
|
2024-07-29 09:29:44,095 batch 936/1044 - loss 4.49083310 - lr 0.0010 - time 419.39s |
|
2024-07-29 09:30:30,312 batch 1040/1044 - loss 4.46784579 - lr 0.0010 - time 465.61s |
|
2024-07-29 09:30:32,342 ---------------------------------------------------------------------------------------------------- |
|
2024-07-29 09:30:32,343 EPOCH 2 DONE |
|
2024-07-29 09:30:42,331 TRAIN Loss: 4.4668 |
|
2024-07-29 09:30:42,332 DEV Loss: 5.4737 |
|
2024-07-29 09:30:42,332 DEV Perplexity: 238.3490 |
|
2024-07-29 09:30:42,332 New best score! |
|
2024-07-29 09:30:42,333 ---------------------------------------------------------------------------------------------------- |
|
2024-07-29 09:30:42,333 EPOCH 3 |
|
2024-07-29 09:31:28,557 batch 104/1044 - loss 4.09782187 - lr 0.0010 - time 46.22s |
|
2024-07-29 09:32:13,046 batch 208/1044 - loss 4.07343847 - lr 0.0010 - time 90.71s |
|
2024-07-29 09:33:01,218 batch 312/1044 - loss 4.06736985 - lr 0.0010 - time 138.89s |
|
2024-07-29 09:33:48,591 batch 416/1044 - loss 4.06640469 - lr 0.0010 - time 186.26s |
|
2024-07-29 09:34:34,291 batch 520/1044 - loss 4.05470543 - lr 0.0010 - time 231.96s |
|
2024-07-29 09:35:18,867 batch 624/1044 - loss 4.04913080 - lr 0.0010 - time 276.53s |
|
2024-07-29 09:36:05,310 batch 728/1044 - loss 4.03836787 - lr 0.0010 - time 322.98s |
|
2024-07-29 09:36:52,483 batch 832/1044 - loss 4.02993985 - lr 0.0010 - time 370.15s |
|
2024-07-29 09:37:45,596 batch 936/1044 - loss 4.02110605 - lr 0.0010 - time 423.26s |
|
2024-07-29 09:38:32,022 batch 1040/1044 - loss 4.01265615 - lr 0.0010 - time 469.69s |
|
2024-07-29 09:38:33,788 ---------------------------------------------------------------------------------------------------- |
|
2024-07-29 09:38:33,789 EPOCH 3 DONE |
|
2024-07-29 09:38:43,523 TRAIN Loss: 4.0125 |
|
2024-07-29 09:38:43,523 DEV Loss: 5.3448 |
|
2024-07-29 09:38:43,523 DEV Perplexity: 209.5177 |
|
2024-07-29 09:38:43,523 New best score! |
|
2024-07-29 09:38:43,525 ---------------------------------------------------------------------------------------------------- |
|
2024-07-29 09:38:43,525 EPOCH 4 |
|
2024-07-29 09:39:30,022 batch 104/1044 - loss 3.75320538 - lr 0.0010 - time 46.50s |
|
2024-07-29 09:40:13,650 batch 208/1044 - loss 3.75565803 - lr 0.0010 - time 90.13s |
|
2024-07-29 09:41:03,234 batch 312/1044 - loss 3.75648206 - lr 0.0010 - time 139.71s |
|
2024-07-29 09:41:48,285 batch 416/1044 - loss 3.75922136 - lr 0.0010 - time 184.76s |
|
2024-07-29 09:42:35,725 batch 520/1044 - loss 3.76481164 - lr 0.0010 - time 232.20s |
|
2024-07-29 09:43:23,771 batch 624/1044 - loss 3.76968772 - lr 0.0010 - time 280.25s |
|
2024-07-29 09:44:10,270 batch 728/1044 - loss 3.76599022 - lr 0.0010 - time 326.75s |
|
2024-07-29 09:44:53,931 batch 832/1044 - loss 3.76276653 - lr 0.0010 - time 370.41s |
|
2024-07-29 09:45:42,287 batch 936/1044 - loss 3.76076873 - lr 0.0010 - time 418.76s |
|
2024-07-29 09:46:28,290 batch 1040/1044 - loss 3.75842409 - lr 0.0010 - time 464.77s |
|
2024-07-29 09:46:29,970 ---------------------------------------------------------------------------------------------------- |
|
2024-07-29 09:46:29,971 EPOCH 4 DONE |
|
2024-07-29 09:46:39,744 TRAIN Loss: 3.7582 |
|
2024-07-29 09:46:39,744 DEV Loss: 5.4143 |
|
2024-07-29 09:46:39,744 DEV Perplexity: 224.5854 |
|
2024-07-29 09:46:39,744 No improvement for 1 epoch(s) |
|
2024-07-29 09:46:39,744 ---------------------------------------------------------------------------------------------------- |
|
2024-07-29 09:46:39,744 EPOCH 5 |
|
2024-07-29 09:47:25,640 batch 104/1044 - loss 3.56488176 - lr 0.0010 - time 45.90s |
|
2024-07-29 09:48:14,673 batch 208/1044 - loss 3.56000491 - lr 0.0010 - time 94.93s |
|
2024-07-29 09:48:58,050 batch 312/1044 - loss 3.57685460 - lr 0.0010 - time 138.31s |
|
2024-07-29 09:49:43,738 batch 416/1044 - loss 3.58028894 - lr 0.0010 - time 183.99s |
|
2024-07-29 09:50:31,676 batch 520/1044 - loss 3.57720232 - lr 0.0010 - time 231.93s |
|
2024-07-29 09:51:18,904 batch 624/1044 - loss 3.57581482 - lr 0.0010 - time 279.16s |
|
2024-07-29 09:52:03,833 batch 728/1044 - loss 3.57699984 - lr 0.0010 - time 324.09s |
|
2024-07-29 09:52:51,054 batch 832/1044 - loss 3.57911868 - lr 0.0010 - time 371.31s |
|
2024-07-29 09:53:37,238 batch 936/1044 - loss 3.57788490 - lr 0.0010 - time 417.49s |
|
2024-07-29 09:54:24,863 batch 1040/1044 - loss 3.57766181 - lr 0.0010 - time 465.12s |
|
2024-07-29 09:54:26,601 ---------------------------------------------------------------------------------------------------- |
|
2024-07-29 09:54:26,602 EPOCH 5 DONE |
|
2024-07-29 09:54:36,652 TRAIN Loss: 3.5769 |
|
2024-07-29 09:54:36,653 DEV Loss: 5.4411 |
|
2024-07-29 09:54:36,653 DEV Perplexity: 230.6889 |
|
2024-07-29 09:54:36,653 No improvement for 2 epoch(s) |
|
2024-07-29 09:54:36,653 ---------------------------------------------------------------------------------------------------- |
|
2024-07-29 09:54:36,653 EPOCH 6 |
|
2024-07-29 09:55:22,723 batch 104/1044 - loss 3.39798842 - lr 0.0010 - time 46.07s |
|
2024-07-29 09:56:07,779 batch 208/1044 - loss 3.43187746 - lr 0.0010 - time 91.13s |
|
2024-07-29 09:56:52,164 batch 312/1044 - loss 3.43835380 - lr 0.0010 - time 135.51s |
|
2024-07-29 09:57:36,906 batch 416/1044 - loss 3.43365484 - lr 0.0010 - time 180.25s |
|
2024-07-29 09:58:22,708 batch 520/1044 - loss 3.43188294 - lr 0.0010 - time 226.05s |
|
2024-07-29 09:59:10,056 batch 624/1044 - loss 3.43404266 - lr 0.0010 - time 273.40s |
|
2024-07-29 09:59:56,703 batch 728/1044 - loss 3.43836744 - lr 0.0010 - time 320.05s |
|
2024-07-29 10:00:44,029 batch 832/1044 - loss 3.43877697 - lr 0.0010 - time 367.38s |
|
2024-07-29 10:01:33,532 batch 936/1044 - loss 3.44485102 - lr 0.0010 - time 416.88s |
|
2024-07-29 10:02:20,897 batch 1040/1044 - loss 3.44382606 - lr 0.0010 - time 464.24s |
|
2024-07-29 10:02:22,705 ---------------------------------------------------------------------------------------------------- |
|
2024-07-29 10:02:22,707 EPOCH 6 DONE |
|
2024-07-29 10:02:32,569 TRAIN Loss: 3.4442 |
|
2024-07-29 10:02:32,570 DEV Loss: 5.2779 |
|
2024-07-29 10:02:32,570 DEV Perplexity: 195.9482 |
|
2024-07-29 10:02:32,570 New best score! |
|
2024-07-29 10:02:32,571 ---------------------------------------------------------------------------------------------------- |
|
2024-07-29 10:02:32,571 EPOCH 7 |
|
2024-07-29 10:03:19,357 batch 104/1044 - loss 3.29575888 - lr 0.0010 - time 46.79s |
|
2024-07-29 10:04:04,483 batch 208/1044 - loss 3.29152036 - lr 0.0010 - time 91.91s |
|
2024-07-29 10:04:51,913 batch 312/1044 - loss 3.28376382 - lr 0.0010 - time 139.34s |
|
2024-07-29 10:05:40,678 batch 416/1044 - loss 3.29208179 - lr 0.0010 - time 188.11s |
|
2024-07-29 10:06:24,489 batch 520/1044 - loss 3.29858603 - lr 0.0010 - time 231.92s |
|
2024-07-29 10:07:10,490 batch 624/1044 - loss 3.30361310 - lr 0.0010 - time 277.92s |
|
2024-07-29 10:07:54,971 batch 728/1044 - loss 3.31160711 - lr 0.0010 - time 322.40s |
|
2024-07-29 10:08:42,467 batch 832/1044 - loss 3.31577718 - lr 0.0010 - time 369.90s |
|
2024-07-29 10:09:31,183 batch 936/1044 - loss 3.32118058 - lr 0.0010 - time 418.61s |
|
2024-07-29 10:10:16,777 batch 1040/1044 - loss 3.32573676 - lr 0.0010 - time 464.21s |
|
2024-07-29 10:10:18,375 ---------------------------------------------------------------------------------------------------- |
|
2024-07-29 10:10:18,377 EPOCH 7 DONE |
|
2024-07-29 10:10:28,406 TRAIN Loss: 3.3257 |
|
2024-07-29 10:10:28,406 DEV Loss: 5.2686 |
|
2024-07-29 10:10:28,406 DEV Perplexity: 194.1387 |
|
2024-07-29 10:10:28,406 New best score! |
|
2024-07-29 10:10:28,407 ---------------------------------------------------------------------------------------------------- |
|
2024-07-29 10:10:28,407 EPOCH 8 |
|
2024-07-29 10:11:17,920 batch 104/1044 - loss 3.16426933 - lr 0.0010 - time 49.51s |
|
2024-07-29 10:12:02,554 batch 208/1044 - loss 3.20414807 - lr 0.0010 - time 94.15s |
|
2024-07-29 10:12:48,729 batch 312/1044 - loss 3.20626744 - lr 0.0010 - time 140.32s |
|
2024-07-29 10:13:35,177 batch 416/1044 - loss 3.21369808 - lr 0.0010 - time 186.77s |
|
2024-07-29 10:14:19,058 batch 520/1044 - loss 3.22363345 - lr 0.0010 - time 230.65s |
|
2024-07-29 10:15:05,021 batch 624/1044 - loss 3.22743286 - lr 0.0010 - time 276.61s |
|
2024-07-29 10:15:51,259 batch 728/1044 - loss 3.23338501 - lr 0.0010 - time 322.85s |
|
2024-07-29 10:16:37,192 batch 832/1044 - loss 3.24250125 - lr 0.0010 - time 368.78s |
|
2024-07-29 10:17:24,202 batch 936/1044 - loss 3.24898902 - lr 0.0010 - time 415.79s |
|
2024-07-29 10:18:12,656 batch 1040/1044 - loss 3.25282626 - lr 0.0010 - time 464.25s |
|
2024-07-29 10:18:14,655 ---------------------------------------------------------------------------------------------------- |
|
2024-07-29 10:18:14,657 EPOCH 8 DONE |
|
2024-07-29 10:18:24,423 TRAIN Loss: 3.2528 |
|
2024-07-29 10:18:24,423 DEV Loss: 5.4322 |
|
2024-07-29 10:18:24,423 DEV Perplexity: 228.6570 |
|
2024-07-29 10:18:24,423 No improvement for 1 epoch(s) |
|
2024-07-29 10:18:24,423 ---------------------------------------------------------------------------------------------------- |
|
2024-07-29 10:18:24,423 EPOCH 9 |
|
2024-07-29 10:19:11,257 batch 104/1044 - loss 3.11123835 - lr 0.0010 - time 46.83s |
|
2024-07-29 10:19:56,480 batch 208/1044 - loss 3.11838686 - lr 0.0010 - time 92.06s |
|
2024-07-29 10:20:41,453 batch 312/1044 - loss 3.12775021 - lr 0.0010 - time 137.03s |
|
2024-07-29 10:21:29,264 batch 416/1044 - loss 3.13006304 - lr 0.0010 - time 184.84s |
|
2024-07-29 10:22:13,410 batch 520/1044 - loss 3.13652410 - lr 0.0010 - time 228.99s |
|
2024-07-29 10:23:01,104 batch 624/1044 - loss 3.14098946 - lr 0.0010 - time 276.68s |
|
2024-07-29 10:23:49,984 batch 728/1044 - loss 3.15212496 - lr 0.0010 - time 325.56s |
|
2024-07-29 10:24:35,093 batch 832/1044 - loss 3.15861385 - lr 0.0010 - time 370.67s |
|
2024-07-29 10:25:22,475 batch 936/1044 - loss 3.16115363 - lr 0.0010 - time 418.05s |
|
2024-07-29 10:26:10,608 batch 1040/1044 - loss 3.16771574 - lr 0.0010 - time 466.18s |
|
2024-07-29 10:26:12,777 ---------------------------------------------------------------------------------------------------- |
|
2024-07-29 10:26:12,779 EPOCH 9 DONE |
|
2024-07-29 10:26:22,507 TRAIN Loss: 3.1674 |
|
2024-07-29 10:26:22,507 DEV Loss: 5.3844 |
|
2024-07-29 10:26:22,508 DEV Perplexity: 217.9797 |
|
2024-07-29 10:26:22,508 No improvement for 2 epoch(s) |
|
2024-07-29 10:26:22,508 ---------------------------------------------------------------------------------------------------- |
|
2024-07-29 10:26:22,508 EPOCH 10 |
|
2024-07-29 10:27:09,577 batch 104/1044 - loss 3.04956690 - lr 0.0010 - time 47.07s |
|
2024-07-29 10:27:55,687 batch 208/1044 - loss 3.05954124 - lr 0.0010 - time 93.18s |
|
2024-07-29 10:28:42,283 batch 312/1044 - loss 3.07472156 - lr 0.0010 - time 139.78s |
|
2024-07-29 10:29:28,041 batch 416/1044 - loss 3.08242494 - lr 0.0010 - time 185.53s |
|
2024-07-29 10:30:13,933 batch 520/1044 - loss 3.08639167 - lr 0.0010 - time 231.43s |
|
2024-07-29 10:31:03,928 batch 624/1044 - loss 3.09298112 - lr 0.0010 - time 281.42s |
|
2024-07-29 10:31:52,849 batch 728/1044 - loss 3.09874452 - lr 0.0010 - time 330.34s |
|
2024-07-29 10:32:38,018 batch 832/1044 - loss 3.10353403 - lr 0.0010 - time 375.51s |
|
2024-07-29 10:33:23,795 batch 936/1044 - loss 3.10650877 - lr 0.0010 - time 421.29s |
|
2024-07-29 10:34:09,619 batch 1040/1044 - loss 3.10740027 - lr 0.0010 - time 467.11s |
|
2024-07-29 10:34:11,922 ---------------------------------------------------------------------------------------------------- |
|
2024-07-29 10:34:11,923 EPOCH 10 DONE |
|
2024-07-29 10:34:21,779 TRAIN Loss: 3.1072 |
|
2024-07-29 10:34:21,780 DEV Loss: 5.3914 |
|
2024-07-29 10:34:21,780 DEV Perplexity: 219.5024 |
|
2024-07-29 10:34:21,780 No improvement for 3 epoch(s) |
|
2024-07-29 10:34:21,780 ---------------------------------------------------------------------------------------------------- |
|
2024-07-29 10:34:21,780 Finished Training |
|
2024-07-29 10:34:40,855 TEST Perplexity: 193.6740 |
|
2024-07-29 10:41:21,550 TEST BLEU = 32.52 86.7/62.7/29.3/7.0 (BP = 1.000 ratio = 1.000 hyp_len = 60 ref_len = 60) |
|
|