|
2024-07-30 05:37:15,468 ---------------------------------------------------------------------------------------------------- |
|
2024-07-30 05:37:15,468 Training Model |
|
2024-07-30 05:37:15,468 ---------------------------------------------------------------------------------------------------- |
|
2024-07-30 05:37:15,468 Translator( |
|
(encoder): EncoderLSTM( |
|
(embedding): Embedding(11860, 300, padding_idx=0) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(lstm): LSTM(300, 512, batch_first=True) |
|
) |
|
(decoder): DecoderLSTM( |
|
(embedding): Embedding(20803, 300, padding_idx=0) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(lstm): LSTM(300, 512, batch_first=True) |
|
(attention): DotProductAttention( |
|
(softmax): Softmax(dim=-1) |
|
(combined2hidden): Sequential( |
|
(0): Linear(in_features=1024, out_features=512, bias=True) |
|
(1): ReLU() |
|
) |
|
) |
|
(hidden2vocab): Linear(in_features=512, out_features=20803, bias=True) |
|
(log_softmax): LogSoftmax(dim=-1) |
|
) |
|
) |
|
2024-07-30 05:37:15,468 ---------------------------------------------------------------------------------------------------- |
|
2024-07-30 05:37:15,468 Training Hyperparameters: |
|
2024-07-30 05:37:15,468 - max_epochs: 10 |
|
2024-07-30 05:37:15,468 - learning_rate: 0.001 |
|
2024-07-30 05:37:15,468 - batch_size: 128 |
|
2024-07-30 05:37:15,468 - patience: 5 |
|
2024-07-30 05:37:15,468 - scheduler_patience: 3 |
|
2024-07-30 05:37:15,468 - teacher_forcing_ratio: 0.5 |
|
2024-07-30 05:37:15,468 ---------------------------------------------------------------------------------------------------- |
|
2024-07-30 05:37:15,468 Computational Parameters: |
|
2024-07-30 05:37:15,468 - num_workers: 4 |
|
2024-07-30 05:37:15,468 - device: device(type='cuda', index=0) |
|
2024-07-30 05:37:15,468 ---------------------------------------------------------------------------------------------------- |
|
2024-07-30 05:37:15,468 Dataset Splits: |
|
2024-07-30 05:37:15,468 - train: 85949 data points |
|
2024-07-30 05:37:15,468 - dev: 12279 data points |
|
2024-07-30 05:37:15,468 - test: 24557 data points |
|
2024-07-30 05:37:15,468 ---------------------------------------------------------------------------------------------------- |
|
2024-07-30 05:37:15,468 EPOCH 1 |
|
2024-07-30 05:37:32,927 batch 67/672 - loss 7.02975038 - lr 0.0010 - time 17.46s |
|
2024-07-30 05:37:47,949 batch 134/672 - loss 6.71117427 - lr 0.0010 - time 32.48s |
|
2024-07-30 05:38:02,964 batch 201/672 - loss 6.54099928 - lr 0.0010 - time 47.50s |
|
2024-07-30 05:38:17,283 batch 268/672 - loss 6.41611423 - lr 0.0010 - time 61.81s |
|
2024-07-30 05:38:31,966 batch 335/672 - loss 6.29867061 - lr 0.0010 - time 76.50s |
|
2024-07-30 05:38:46,988 batch 402/672 - loss 6.18939765 - lr 0.0010 - time 91.52s |
|
2024-07-30 05:39:01,806 batch 469/672 - loss 6.08893062 - lr 0.0010 - time 106.34s |
|
2024-07-30 05:39:16,615 batch 536/672 - loss 5.99670628 - lr 0.0010 - time 121.15s |
|
2024-07-30 05:39:32,715 batch 603/672 - loss 5.90835549 - lr 0.0010 - time 137.25s |
|
2024-07-30 05:39:48,372 batch 670/672 - loss 5.82820846 - lr 0.0010 - time 152.90s |
|
2024-07-30 05:39:48,928 ---------------------------------------------------------------------------------------------------- |
|
2024-07-30 05:39:48,929 EPOCH 1 DONE |
|
2024-07-30 05:39:55,582 TRAIN Loss: 5.8259 |
|
2024-07-30 05:39:55,583 DEV Loss: 5.9498 |
|
2024-07-30 05:39:55,583 DEV Perplexity: 383.6837 |
|
2024-07-30 05:39:55,583 New best score! |
|
2024-07-30 05:39:55,584 ---------------------------------------------------------------------------------------------------- |
|
2024-07-30 05:39:55,584 EPOCH 2 |
|
2024-07-30 05:40:10,237 batch 67/672 - loss 4.94811222 - lr 0.0010 - time 14.65s |
|
2024-07-30 05:40:24,957 batch 134/672 - loss 4.87957455 - lr 0.0010 - time 29.37s |
|
2024-07-30 05:40:39,341 batch 201/672 - loss 4.83254105 - lr 0.0010 - time 43.76s |
|
2024-07-30 05:40:53,204 batch 268/672 - loss 4.78420364 - lr 0.0010 - time 57.62s |
|
2024-07-30 05:41:09,735 batch 335/672 - loss 4.73921762 - lr 0.0010 - time 74.15s |
|
2024-07-30 05:41:25,335 batch 402/672 - loss 4.69684552 - lr 0.0010 - time 89.75s |
|
2024-07-30 05:41:39,835 batch 469/672 - loss 4.66227807 - lr 0.0010 - time 104.25s |
|
2024-07-30 05:41:54,143 batch 536/672 - loss 4.62496828 - lr 0.0010 - time 118.56s |
|
2024-07-30 05:42:11,248 batch 603/672 - loss 4.59447982 - lr 0.0010 - time 135.66s |
|
2024-07-30 05:42:27,703 batch 670/672 - loss 4.56273915 - lr 0.0010 - time 152.12s |
|
2024-07-30 05:42:28,145 ---------------------------------------------------------------------------------------------------- |
|
2024-07-30 05:42:28,146 EPOCH 2 DONE |
|
2024-07-30 05:42:34,823 TRAIN Loss: 4.5617 |
|
2024-07-30 05:42:34,823 DEV Loss: 5.6678 |
|
2024-07-30 05:42:34,824 DEV Perplexity: 289.3950 |
|
2024-07-30 05:42:34,824 New best score! |
|
2024-07-30 05:42:34,825 ---------------------------------------------------------------------------------------------------- |
|
2024-07-30 05:42:34,825 EPOCH 3 |
|
2024-07-30 05:42:50,156 batch 67/672 - loss 4.10132092 - lr 0.0010 - time 15.33s |
|
2024-07-30 05:43:04,641 batch 134/672 - loss 4.07855133 - lr 0.0010 - time 29.82s |
|
2024-07-30 05:43:20,810 batch 201/672 - loss 4.06460287 - lr 0.0010 - time 45.99s |
|
2024-07-30 05:43:35,663 batch 268/672 - loss 4.04538993 - lr 0.0010 - time 60.84s |
|
2024-07-30 05:43:51,597 batch 335/672 - loss 4.02722402 - lr 0.0010 - time 76.77s |
|
2024-07-30 05:44:06,421 batch 402/672 - loss 4.01842412 - lr 0.0010 - time 91.60s |
|
2024-07-30 05:44:22,885 batch 469/672 - loss 4.00276648 - lr 0.0010 - time 108.06s |
|
2024-07-30 05:44:38,368 batch 536/672 - loss 3.98641537 - lr 0.0010 - time 123.54s |
|
2024-07-30 05:44:52,112 batch 603/672 - loss 3.96747647 - lr 0.0010 - time 137.29s |
|
2024-07-30 05:45:06,111 batch 670/672 - loss 3.95417700 - lr 0.0010 - time 151.29s |
|
2024-07-30 05:45:06,622 ---------------------------------------------------------------------------------------------------- |
|
2024-07-30 05:45:06,623 EPOCH 3 DONE |
|
2024-07-30 05:45:13,296 TRAIN Loss: 3.9541 |
|
2024-07-30 05:45:13,297 DEV Loss: 5.5469 |
|
2024-07-30 05:45:13,297 DEV Perplexity: 256.4349 |
|
2024-07-30 05:45:13,297 New best score! |
|
2024-07-30 05:45:13,298 ---------------------------------------------------------------------------------------------------- |
|
2024-07-30 05:45:13,298 EPOCH 4 |
|
2024-07-30 05:45:27,530 batch 67/672 - loss 3.58314629 - lr 0.0010 - time 14.23s |
|
2024-07-30 05:45:42,047 batch 134/672 - loss 3.61880102 - lr 0.0010 - time 28.75s |
|
2024-07-30 05:45:57,236 batch 201/672 - loss 3.59876704 - lr 0.0010 - time 43.94s |
|
2024-07-30 05:46:12,211 batch 268/672 - loss 3.60037185 - lr 0.0010 - time 58.91s |
|
2024-07-30 05:46:26,663 batch 335/672 - loss 3.59520071 - lr 0.0010 - time 73.36s |
|
2024-07-30 05:46:42,085 batch 402/672 - loss 3.58989806 - lr 0.0010 - time 88.79s |
|
2024-07-30 05:46:57,170 batch 469/672 - loss 3.59024472 - lr 0.0010 - time 103.87s |
|
2024-07-30 05:47:13,003 batch 536/672 - loss 3.58583019 - lr 0.0010 - time 119.70s |
|
2024-07-30 05:47:28,724 batch 603/672 - loss 3.58285773 - lr 0.0010 - time 135.43s |
|
2024-07-30 05:47:44,206 batch 670/672 - loss 3.57629914 - lr 0.0010 - time 150.91s |
|
2024-07-30 05:47:44,707 ---------------------------------------------------------------------------------------------------- |
|
2024-07-30 05:47:44,708 EPOCH 4 DONE |
|
2024-07-30 05:47:51,341 TRAIN Loss: 3.5756 |
|
2024-07-30 05:47:51,342 DEV Loss: 5.5492 |
|
2024-07-30 05:47:51,342 DEV Perplexity: 257.0241 |
|
2024-07-30 05:47:51,342 No improvement for 1 epoch(s) |
|
2024-07-30 05:47:51,342 ---------------------------------------------------------------------------------------------------- |
|
2024-07-30 05:47:51,342 EPOCH 5 |
|
2024-07-30 05:48:08,249 batch 67/672 - loss 3.34088491 - lr 0.0010 - time 16.91s |
|
2024-07-30 05:48:22,070 batch 134/672 - loss 3.33345445 - lr 0.0010 - time 30.73s |
|
2024-07-30 05:48:37,068 batch 201/672 - loss 3.34338642 - lr 0.0010 - time 45.73s |
|
2024-07-30 05:48:52,643 batch 268/672 - loss 3.34294628 - lr 0.0010 - time 61.30s |
|
2024-07-30 05:49:07,513 batch 335/672 - loss 3.34124798 - lr 0.0010 - time 76.17s |
|
2024-07-30 05:49:21,720 batch 402/672 - loss 3.33298490 - lr 0.0010 - time 90.38s |
|
2024-07-30 05:49:36,527 batch 469/672 - loss 3.33775506 - lr 0.0010 - time 105.19s |
|
2024-07-30 05:49:51,453 batch 536/672 - loss 3.33724512 - lr 0.0010 - time 120.11s |
|
2024-07-30 05:50:06,222 batch 603/672 - loss 3.33896945 - lr 0.0010 - time 134.88s |
|
2024-07-30 05:50:22,129 batch 670/672 - loss 3.33729344 - lr 0.0010 - time 150.79s |
|
2024-07-30 05:50:22,681 ---------------------------------------------------------------------------------------------------- |
|
2024-07-30 05:50:22,682 EPOCH 5 DONE |
|
2024-07-30 05:50:29,337 TRAIN Loss: 3.3369 |
|
2024-07-30 05:50:29,338 DEV Loss: 5.5888 |
|
2024-07-30 05:50:29,338 DEV Perplexity: 267.4194 |
|
2024-07-30 05:50:29,338 No improvement for 2 epoch(s) |
|
2024-07-30 05:50:29,338 ---------------------------------------------------------------------------------------------------- |
|
2024-07-30 05:50:29,338 EPOCH 6 |
|
2024-07-30 05:50:44,538 batch 67/672 - loss 3.09988255 - lr 0.0010 - time 15.20s |
|
2024-07-30 05:51:00,031 batch 134/672 - loss 3.12499664 - lr 0.0010 - time 30.69s |
|
2024-07-30 05:51:14,763 batch 201/672 - loss 3.14307480 - lr 0.0010 - time 45.42s |
|
2024-07-30 05:51:30,017 batch 268/672 - loss 3.15283256 - lr 0.0010 - time 60.68s |
|
2024-07-30 05:51:45,540 batch 335/672 - loss 3.14810573 - lr 0.0010 - time 76.20s |
|
2024-07-30 05:52:00,299 batch 402/672 - loss 3.15746469 - lr 0.0010 - time 90.96s |
|
2024-07-30 05:52:15,529 batch 469/672 - loss 3.15836043 - lr 0.0010 - time 106.19s |
|
2024-07-30 05:52:31,790 batch 536/672 - loss 3.16262923 - lr 0.0010 - time 122.45s |
|
2024-07-30 05:52:47,277 batch 603/672 - loss 3.16002134 - lr 0.0010 - time 137.94s |
|
2024-07-30 05:53:01,751 batch 670/672 - loss 3.16646054 - lr 0.0010 - time 152.41s |
|
2024-07-30 05:53:02,395 ---------------------------------------------------------------------------------------------------- |
|
2024-07-30 05:53:02,396 EPOCH 6 DONE |
|
2024-07-30 05:53:09,115 TRAIN Loss: 3.1667 |
|
2024-07-30 05:53:09,115 DEV Loss: 5.5071 |
|
2024-07-30 05:53:09,115 DEV Perplexity: 246.4274 |
|
2024-07-30 05:53:09,115 New best score! |
|
2024-07-30 05:53:09,116 ---------------------------------------------------------------------------------------------------- |
|
2024-07-30 05:53:09,116 EPOCH 7 |
|
2024-07-30 05:53:24,807 batch 67/672 - loss 2.98739854 - lr 0.0010 - time 15.69s |
|
2024-07-30 05:53:39,149 batch 134/672 - loss 2.98036382 - lr 0.0010 - time 30.03s |
|
2024-07-30 05:53:53,482 batch 201/672 - loss 2.98568483 - lr 0.0010 - time 44.37s |
|
2024-07-30 05:54:08,551 batch 268/672 - loss 2.99329857 - lr 0.0010 - time 59.43s |
|
2024-07-30 05:54:22,699 batch 335/672 - loss 3.00473778 - lr 0.0010 - time 73.58s |
|
2024-07-30 05:54:37,180 batch 402/672 - loss 3.01970972 - lr 0.0010 - time 88.06s |
|
2024-07-30 05:54:54,515 batch 469/672 - loss 3.02301556 - lr 0.0010 - time 105.40s |
|
2024-07-30 05:55:10,416 batch 536/672 - loss 3.02170302 - lr 0.0010 - time 121.30s |
|
2024-07-30 05:55:26,104 batch 603/672 - loss 3.02361924 - lr 0.0010 - time 136.99s |
|
2024-07-30 05:55:42,134 batch 670/672 - loss 3.02671158 - lr 0.0010 - time 153.02s |
|
2024-07-30 05:55:42,550 ---------------------------------------------------------------------------------------------------- |
|
2024-07-30 05:55:42,550 EPOCH 7 DONE |
|
2024-07-30 05:55:49,358 TRAIN Loss: 3.0269 |
|
2024-07-30 05:55:49,359 DEV Loss: 5.6211 |
|
2024-07-30 05:55:49,359 DEV Perplexity: 276.2037 |
|
2024-07-30 05:55:49,359 No improvement for 1 epoch(s) |
|
2024-07-30 05:55:49,359 ---------------------------------------------------------------------------------------------------- |
|
2024-07-30 05:55:49,359 EPOCH 8 |
|
2024-07-30 05:56:06,200 batch 67/672 - loss 2.86453795 - lr 0.0010 - time 16.84s |
|
2024-07-30 05:56:21,805 batch 134/672 - loss 2.86024533 - lr 0.0010 - time 32.45s |
|
2024-07-30 05:56:36,757 batch 201/672 - loss 2.85630216 - lr 0.0010 - time 47.40s |
|
2024-07-30 05:56:52,468 batch 268/672 - loss 2.86597593 - lr 0.0010 - time 63.11s |
|
2024-07-30 05:57:07,027 batch 335/672 - loss 2.87845792 - lr 0.0010 - time 77.67s |
|
2024-07-30 05:57:21,279 batch 402/672 - loss 2.88857114 - lr 0.0010 - time 91.92s |
|
2024-07-30 05:57:37,150 batch 469/672 - loss 2.89169808 - lr 0.0010 - time 107.79s |
|
2024-07-30 05:57:52,295 batch 536/672 - loss 2.89775301 - lr 0.0010 - time 122.94s |
|
2024-07-30 05:58:07,862 batch 603/672 - loss 2.90178569 - lr 0.0010 - time 138.50s |
|
2024-07-30 05:58:22,653 batch 670/672 - loss 2.90217636 - lr 0.0010 - time 153.29s |
|
2024-07-30 05:58:23,114 ---------------------------------------------------------------------------------------------------- |
|
2024-07-30 05:58:23,115 EPOCH 8 DONE |
|
2024-07-30 05:58:29,762 TRAIN Loss: 2.9031 |
|
2024-07-30 05:58:29,762 DEV Loss: 5.6538 |
|
2024-07-30 05:58:29,762 DEV Perplexity: 285.3849 |
|
2024-07-30 05:58:29,762 No improvement for 2 epoch(s) |
|
2024-07-30 05:58:29,762 ---------------------------------------------------------------------------------------------------- |
|
2024-07-30 05:58:29,762 EPOCH 9 |
|
2024-07-30 05:58:44,994 batch 67/672 - loss 2.73835637 - lr 0.0010 - time 15.23s |
|
2024-07-30 05:59:00,180 batch 134/672 - loss 2.73880063 - lr 0.0010 - time 30.42s |
|
2024-07-30 05:59:15,963 batch 201/672 - loss 2.74995850 - lr 0.0010 - time 46.20s |
|
2024-07-30 05:59:31,437 batch 268/672 - loss 2.76165144 - lr 0.0010 - time 61.67s |
|
2024-07-30 05:59:46,416 batch 335/672 - loss 2.77234203 - lr 0.0010 - time 76.65s |
|
2024-07-30 06:00:01,072 batch 402/672 - loss 2.77924203 - lr 0.0010 - time 91.31s |
|
2024-07-30 06:00:15,766 batch 469/672 - loss 2.79409497 - lr 0.0010 - time 106.00s |
|
2024-07-30 06:00:31,180 batch 536/672 - loss 2.79967493 - lr 0.0010 - time 121.42s |
|
2024-07-30 06:00:46,193 batch 603/672 - loss 2.80317950 - lr 0.0010 - time 136.43s |
|
2024-07-30 06:01:02,490 batch 670/672 - loss 2.80855727 - lr 0.0010 - time 152.73s |
|
2024-07-30 06:01:02,875 ---------------------------------------------------------------------------------------------------- |
|
2024-07-30 06:01:02,876 EPOCH 9 DONE |
|
2024-07-30 06:01:09,698 TRAIN Loss: 2.8086 |
|
2024-07-30 06:01:09,698 DEV Loss: 5.6621 |
|
2024-07-30 06:01:09,698 DEV Perplexity: 287.7391 |
|
2024-07-30 06:01:09,698 No improvement for 3 epoch(s) |
|
2024-07-30 06:01:09,698 ---------------------------------------------------------------------------------------------------- |
|
2024-07-30 06:01:09,698 EPOCH 10 |
|
2024-07-30 06:01:24,618 batch 67/672 - loss 2.64406736 - lr 0.0010 - time 14.92s |
|
2024-07-30 06:01:40,065 batch 134/672 - loss 2.67658532 - lr 0.0010 - time 30.37s |
|
2024-07-30 06:01:54,965 batch 201/672 - loss 2.68379090 - lr 0.0010 - time 45.27s |
|
2024-07-30 06:02:09,870 batch 268/672 - loss 2.68974200 - lr 0.0010 - time 60.17s |
|
2024-07-30 06:02:24,367 batch 335/672 - loss 2.69738334 - lr 0.0010 - time 74.67s |
|
2024-07-30 06:02:39,352 batch 402/672 - loss 2.71200059 - lr 0.0010 - time 89.65s |
|
2024-07-30 06:02:55,514 batch 469/672 - loss 2.71962268 - lr 0.0010 - time 105.82s |
|
2024-07-30 06:03:11,123 batch 536/672 - loss 2.72209992 - lr 0.0010 - time 121.42s |
|
2024-07-30 06:03:27,930 batch 603/672 - loss 2.72870156 - lr 0.0010 - time 138.23s |
|
2024-07-30 06:03:42,785 batch 670/672 - loss 2.73530983 - lr 0.0010 - time 153.09s |
|
2024-07-30 06:03:43,269 ---------------------------------------------------------------------------------------------------- |
|
2024-07-30 06:03:43,270 EPOCH 10 DONE |
|
2024-07-30 06:03:49,979 TRAIN Loss: 2.7360 |
|
2024-07-30 06:03:49,980 DEV Loss: 5.7023 |
|
2024-07-30 06:03:49,980 DEV Perplexity: 299.5418 |
|
2024-07-30 06:03:49,980 No improvement for 4 epoch(s) |
|
2024-07-30 06:03:49,980 ---------------------------------------------------------------------------------------------------- |
|
2024-07-30 06:03:49,980 Finished Training |
|
2024-07-30 06:04:02,968 TEST Perplexity: 243.8509 |
|
2024-07-30 06:09:52,994 TEST BLEU = 51.71 95.2/73.2/40.0/25.6 (BP = 1.000 ratio = 1.000 hyp_len = 42 ref_len = 42) |
|
|