|
2024-07-30 05:52:44,411 ---------------------------------------------------------------------------------------------------- |
|
2024-07-30 05:52:44,411 Training Model |
|
2024-07-30 05:52:44,411 ---------------------------------------------------------------------------------------------------- |
|
2024-07-30 05:52:44,411 Translator( |
|
(encoder): EncoderLSTM( |
|
(embedding): Embedding(13968, 300, padding_idx=0) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(lstm): LSTM(300, 512, batch_first=True) |
|
) |
|
(decoder): DecoderLSTM( |
|
(embedding): Embedding(21119, 300, padding_idx=0) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(lstm): LSTM(300, 512, batch_first=True) |
|
(attention): DotProductAttention( |
|
(softmax): Softmax(dim=-1) |
|
(combined2hidden): Sequential( |
|
(0): Linear(in_features=1024, out_features=512, bias=True) |
|
(1): ReLU() |
|
) |
|
) |
|
(hidden2vocab): Linear(in_features=512, out_features=21119, bias=True) |
|
(log_softmax): LogSoftmax(dim=-1) |
|
) |
|
) |
|
2024-07-30 05:52:44,411 ---------------------------------------------------------------------------------------------------- |
|
2024-07-30 05:52:44,411 Training Hyperparameters: |
|
2024-07-30 05:52:44,411 - max_epochs: 10 |
|
2024-07-30 05:52:44,411 - learning_rate: 0.001 |
|
2024-07-30 05:52:44,411 - batch_size: 128 |
|
2024-07-30 05:52:44,411 - patience: 5 |
|
2024-07-30 05:52:44,411 - scheduler_patience: 3 |
|
2024-07-30 05:52:44,411 - teacher_forcing_ratio: 0.5 |
|
2024-07-30 05:52:44,411 ---------------------------------------------------------------------------------------------------- |
|
2024-07-30 05:52:44,411 Computational Parameters: |
|
2024-07-30 05:52:44,411 - num_workers: 4 |
|
2024-07-30 05:52:44,412 - device: device(type='cuda', index=0) |
|
2024-07-30 05:52:44,412 ---------------------------------------------------------------------------------------------------- |
|
2024-07-30 05:52:44,412 Dataset Splits: |
|
2024-07-30 05:52:44,412 - train: 129388 data points |
|
2024-07-30 05:52:44,412 - dev: 18485 data points |
|
2024-07-30 05:52:44,412 - test: 36969 data points |
|
2024-07-30 05:52:44,412 ---------------------------------------------------------------------------------------------------- |
|
2024-07-30 05:52:44,412 EPOCH 1 |
|
2024-07-30 05:53:01,839 batch 101/1011 - loss 6.65023891 - lr 0.0010 - time 17.43s |
|
2024-07-30 05:53:19,994 batch 202/1011 - loss 6.37670207 - lr 0.0010 - time 35.58s |
|
2024-07-30 05:53:37,722 batch 303/1011 - loss 6.18777885 - lr 0.0010 - time 53.31s |
|
2024-07-30 05:53:55,183 batch 404/1011 - loss 6.02016410 - lr 0.0010 - time 70.77s |
|
2024-07-30 05:54:12,971 batch 505/1011 - loss 5.87491408 - lr 0.0010 - time 88.56s |
|
2024-07-30 05:54:30,796 batch 606/1011 - loss 5.74571717 - lr 0.0010 - time 106.38s |
|
2024-07-30 05:54:48,498 batch 707/1011 - loss 5.62989615 - lr 0.0010 - time 124.09s |
|
2024-07-30 05:55:06,801 batch 808/1011 - loss 5.52785168 - lr 0.0010 - time 142.39s |
|
2024-07-30 05:55:24,580 batch 909/1011 - loss 5.43488396 - lr 0.0010 - time 160.17s |
|
2024-07-30 05:55:42,937 batch 1010/1011 - loss 5.35160901 - lr 0.0010 - time 178.53s |
|
2024-07-30 05:55:43,149 ---------------------------------------------------------------------------------------------------- |
|
2024-07-30 05:55:43,150 EPOCH 1 DONE |
|
2024-07-30 05:55:51,676 TRAIN Loss: 5.3508 |
|
2024-07-30 05:55:51,677 DEV Loss: 5.3985 |
|
2024-07-30 05:55:51,677 DEV Perplexity: 221.0851 |
|
2024-07-30 05:55:51,677 New best score! |
|
2024-07-30 05:55:51,678 ---------------------------------------------------------------------------------------------------- |
|
2024-07-30 05:55:51,678 EPOCH 2 |
|
2024-07-30 05:56:10,138 batch 101/1011 - loss 4.35387788 - lr 0.0010 - time 18.46s |
|
2024-07-30 05:56:27,943 batch 202/1011 - loss 4.31176025 - lr 0.0010 - time 36.26s |
|
2024-07-30 05:56:45,329 batch 303/1011 - loss 4.26554860 - lr 0.0010 - time 53.65s |
|
2024-07-30 05:57:03,548 batch 404/1011 - loss 4.22823117 - lr 0.0010 - time 71.87s |
|
2024-07-30 05:57:21,407 batch 505/1011 - loss 4.19638747 - lr 0.0010 - time 89.73s |
|
2024-07-30 05:57:40,047 batch 606/1011 - loss 4.16619647 - lr 0.0010 - time 108.37s |
|
2024-07-30 05:57:58,263 batch 707/1011 - loss 4.13296555 - lr 0.0010 - time 126.59s |
|
2024-07-30 05:58:16,175 batch 808/1011 - loss 4.10798012 - lr 0.0010 - time 144.50s |
|
2024-07-30 05:58:34,009 batch 909/1011 - loss 4.08410400 - lr 0.0010 - time 162.33s |
|
2024-07-30 05:58:51,761 batch 1010/1011 - loss 4.06205331 - lr 0.0010 - time 180.08s |
|
2024-07-30 05:58:51,991 ---------------------------------------------------------------------------------------------------- |
|
2024-07-30 05:58:51,992 EPOCH 2 DONE |
|
2024-07-30 05:59:00,286 TRAIN Loss: 4.0617 |
|
2024-07-30 05:59:00,286 DEV Loss: 5.3025 |
|
2024-07-30 05:59:00,286 DEV Perplexity: 200.8374 |
|
2024-07-30 05:59:00,286 New best score! |
|
2024-07-30 05:59:00,287 ---------------------------------------------------------------------------------------------------- |
|
2024-07-30 05:59:00,287 EPOCH 3 |
|
2024-07-30 05:59:18,575 batch 101/1011 - loss 3.63924071 - lr 0.0010 - time 18.29s |
|
2024-07-30 05:59:35,942 batch 202/1011 - loss 3.64374635 - lr 0.0010 - time 35.65s |
|
2024-07-30 05:59:53,930 batch 303/1011 - loss 3.62353413 - lr 0.0010 - time 53.64s |
|
2024-07-30 06:00:12,653 batch 404/1011 - loss 3.60985195 - lr 0.0010 - time 72.37s |
|
2024-07-30 06:00:31,232 batch 505/1011 - loss 3.60273010 - lr 0.0010 - time 90.95s |
|
2024-07-30 06:00:49,167 batch 606/1011 - loss 3.59089866 - lr 0.0010 - time 108.88s |
|
2024-07-30 06:01:07,550 batch 707/1011 - loss 3.58091725 - lr 0.0010 - time 127.26s |
|
2024-07-30 06:01:25,805 batch 808/1011 - loss 3.56992363 - lr 0.0010 - time 145.52s |
|
2024-07-30 06:01:43,461 batch 909/1011 - loss 3.56065750 - lr 0.0010 - time 163.17s |
|
2024-07-30 06:02:01,601 batch 1010/1011 - loss 3.55507214 - lr 0.0010 - time 181.31s |
|
2024-07-30 06:02:01,845 ---------------------------------------------------------------------------------------------------- |
|
2024-07-30 06:02:01,845 EPOCH 3 DONE |
|
2024-07-30 06:02:10,227 TRAIN Loss: 3.5548 |
|
2024-07-30 06:02:10,227 DEV Loss: 5.3260 |
|
2024-07-30 06:02:10,227 DEV Perplexity: 205.6116 |
|
2024-07-30 06:02:10,227 No improvement for 1 epoch(s) |
|
2024-07-30 06:02:10,227 ---------------------------------------------------------------------------------------------------- |
|
2024-07-30 06:02:10,227 EPOCH 4 |
|
2024-07-30 06:02:28,830 batch 101/1011 - loss 3.28445469 - lr 0.0010 - time 18.60s |
|
2024-07-30 06:02:46,477 batch 202/1011 - loss 3.28501337 - lr 0.0010 - time 36.25s |
|
2024-07-30 06:03:04,557 batch 303/1011 - loss 3.28228449 - lr 0.0010 - time 54.33s |
|
2024-07-30 06:03:22,554 batch 404/1011 - loss 3.28625823 - lr 0.0010 - time 72.33s |
|
2024-07-30 06:03:40,976 batch 505/1011 - loss 3.28577206 - lr 0.0010 - time 90.75s |
|
2024-07-30 06:03:59,242 batch 606/1011 - loss 3.27718717 - lr 0.0010 - time 109.02s |
|
2024-07-30 06:04:17,149 batch 707/1011 - loss 3.27785742 - lr 0.0010 - time 126.92s |
|
2024-07-30 06:04:35,362 batch 808/1011 - loss 3.27383622 - lr 0.0010 - time 145.13s |
|
2024-07-30 06:04:53,460 batch 909/1011 - loss 3.27473364 - lr 0.0010 - time 163.23s |
|
2024-07-30 06:05:11,650 batch 1010/1011 - loss 3.26900207 - lr 0.0010 - time 181.42s |
|
2024-07-30 06:05:11,891 ---------------------------------------------------------------------------------------------------- |
|
2024-07-30 06:05:11,892 EPOCH 4 DONE |
|
2024-07-30 06:05:20,185 TRAIN Loss: 3.2689 |
|
2024-07-30 06:05:20,185 DEV Loss: 5.0322 |
|
2024-07-30 06:05:20,185 DEV Perplexity: 153.2689 |
|
2024-07-30 06:05:20,185 New best score! |
|
2024-07-30 06:05:20,186 ---------------------------------------------------------------------------------------------------- |
|
2024-07-30 06:05:20,186 EPOCH 5 |
|
2024-07-30 06:05:38,378 batch 101/1011 - loss 3.04620511 - lr 0.0010 - time 18.19s |
|
2024-07-30 06:05:56,066 batch 202/1011 - loss 3.05621696 - lr 0.0010 - time 35.88s |
|
2024-07-30 06:06:14,397 batch 303/1011 - loss 3.07215565 - lr 0.0010 - time 54.21s |
|
2024-07-30 06:06:32,274 batch 404/1011 - loss 3.07633480 - lr 0.0010 - time 72.09s |
|
2024-07-30 06:06:50,327 batch 505/1011 - loss 3.08537655 - lr 0.0010 - time 90.14s |
|
2024-07-30 06:07:08,595 batch 606/1011 - loss 3.08699256 - lr 0.0010 - time 108.41s |
|
2024-07-30 06:07:26,268 batch 707/1011 - loss 3.09115884 - lr 0.0010 - time 126.08s |
|
2024-07-30 06:07:43,949 batch 808/1011 - loss 3.09043722 - lr 0.0010 - time 143.76s |
|
2024-07-30 06:08:02,577 batch 909/1011 - loss 3.09457381 - lr 0.0010 - time 162.39s |
|
2024-07-30 06:08:20,523 batch 1010/1011 - loss 3.09344615 - lr 0.0010 - time 180.34s |
|
2024-07-30 06:08:20,849 ---------------------------------------------------------------------------------------------------- |
|
2024-07-30 06:08:20,850 EPOCH 5 DONE |
|
2024-07-30 06:08:29,338 TRAIN Loss: 3.0932 |
|
2024-07-30 06:08:29,339 DEV Loss: 5.2149 |
|
2024-07-30 06:08:29,339 DEV Perplexity: 183.9882 |
|
2024-07-30 06:08:29,339 No improvement for 1 epoch(s) |
|
2024-07-30 06:08:29,339 ---------------------------------------------------------------------------------------------------- |
|
2024-07-30 06:08:29,339 EPOCH 6 |
|
2024-07-30 06:08:48,148 batch 101/1011 - loss 2.95294287 - lr 0.0010 - time 18.81s |
|
2024-07-30 06:09:05,811 batch 202/1011 - loss 2.94095918 - lr 0.0010 - time 36.47s |
|
2024-07-30 06:09:24,029 batch 303/1011 - loss 2.92945394 - lr 0.0010 - time 54.69s |
|
2024-07-30 06:09:41,027 batch 404/1011 - loss 2.93493413 - lr 0.0010 - time 71.69s |
|
2024-07-30 06:09:59,141 batch 505/1011 - loss 2.93981232 - lr 0.0010 - time 89.80s |
|
2024-07-30 06:10:17,688 batch 606/1011 - loss 2.94975308 - lr 0.0010 - time 108.35s |
|
2024-07-30 06:10:35,813 batch 707/1011 - loss 2.95301807 - lr 0.0010 - time 126.47s |
|
2024-07-30 06:10:53,277 batch 808/1011 - loss 2.95479195 - lr 0.0010 - time 143.94s |
|
2024-07-30 06:11:11,427 batch 909/1011 - loss 2.95849915 - lr 0.0010 - time 162.09s |
|
2024-07-30 06:11:29,521 batch 1010/1011 - loss 2.96108001 - lr 0.0010 - time 180.18s |
|
2024-07-30 06:11:29,783 ---------------------------------------------------------------------------------------------------- |
|
2024-07-30 06:11:29,784 EPOCH 6 DONE |
|
2024-07-30 06:11:38,034 TRAIN Loss: 2.9611 |
|
2024-07-30 06:11:38,034 DEV Loss: 5.2295 |
|
2024-07-30 06:11:38,034 DEV Perplexity: 186.7041 |
|
2024-07-30 06:11:38,034 No improvement for 2 epoch(s) |
|
2024-07-30 06:11:38,034 ---------------------------------------------------------------------------------------------------- |
|
2024-07-30 06:11:38,034 EPOCH 7 |
|
2024-07-30 06:11:56,963 batch 101/1011 - loss 2.83695398 - lr 0.0010 - time 18.93s |
|
2024-07-30 06:12:14,852 batch 202/1011 - loss 2.82333080 - lr 0.0010 - time 36.82s |
|
2024-07-30 06:12:33,156 batch 303/1011 - loss 2.82609393 - lr 0.0010 - time 55.12s |
|
2024-07-30 06:12:50,576 batch 404/1011 - loss 2.83502390 - lr 0.0010 - time 72.54s |
|
2024-07-30 06:13:08,639 batch 505/1011 - loss 2.84160646 - lr 0.0010 - time 90.60s |
|
2024-07-30 06:13:26,933 batch 606/1011 - loss 2.84488981 - lr 0.0010 - time 108.90s |
|
2024-07-30 06:13:45,357 batch 707/1011 - loss 2.84374654 - lr 0.0010 - time 127.32s |
|
2024-07-30 06:14:03,581 batch 808/1011 - loss 2.84697443 - lr 0.0010 - time 145.55s |
|
2024-07-30 06:14:21,037 batch 909/1011 - loss 2.84805540 - lr 0.0010 - time 163.00s |
|
2024-07-30 06:14:39,192 batch 1010/1011 - loss 2.85068582 - lr 0.0010 - time 181.16s |
|
2024-07-30 06:14:39,414 ---------------------------------------------------------------------------------------------------- |
|
2024-07-30 06:14:39,415 EPOCH 7 DONE |
|
2024-07-30 06:14:47,851 TRAIN Loss: 2.8508 |
|
2024-07-30 06:14:47,851 DEV Loss: 5.2494 |
|
2024-07-30 06:14:47,851 DEV Perplexity: 190.4522 |
|
2024-07-30 06:14:47,851 No improvement for 3 epoch(s) |
|
2024-07-30 06:14:47,851 ---------------------------------------------------------------------------------------------------- |
|
2024-07-30 06:14:47,851 EPOCH 8 |
|
2024-07-30 06:15:06,292 batch 101/1011 - loss 2.68063131 - lr 0.0010 - time 18.44s |
|
2024-07-30 06:15:24,465 batch 202/1011 - loss 2.70113396 - lr 0.0010 - time 36.61s |
|
2024-07-30 06:15:43,138 batch 303/1011 - loss 2.71602054 - lr 0.0010 - time 55.29s |
|
2024-07-30 06:16:01,098 batch 404/1011 - loss 2.73064416 - lr 0.0010 - time 73.25s |
|
2024-07-30 06:16:19,362 batch 505/1011 - loss 2.73879396 - lr 0.0010 - time 91.51s |
|
2024-07-30 06:16:37,807 batch 606/1011 - loss 2.74986825 - lr 0.0010 - time 109.96s |
|
2024-07-30 06:16:55,815 batch 707/1011 - loss 2.75368593 - lr 0.0010 - time 127.96s |
|
2024-07-30 06:17:13,486 batch 808/1011 - loss 2.75654979 - lr 0.0010 - time 145.64s |
|
2024-07-30 06:17:31,466 batch 909/1011 - loss 2.76380862 - lr 0.0010 - time 163.61s |
|
2024-07-30 06:17:49,300 batch 1010/1011 - loss 2.76961123 - lr 0.0010 - time 181.45s |
|
2024-07-30 06:17:49,575 ---------------------------------------------------------------------------------------------------- |
|
2024-07-30 06:17:49,576 EPOCH 8 DONE |
|
2024-07-30 06:17:57,990 TRAIN Loss: 2.7696 |
|
2024-07-30 06:17:57,990 DEV Loss: 5.0718 |
|
2024-07-30 06:17:57,990 DEV Perplexity: 159.4646 |
|
2024-07-30 06:17:57,991 No improvement for 4 epoch(s) |
|
2024-07-30 06:17:57,991 ---------------------------------------------------------------------------------------------------- |
|
2024-07-30 06:17:57,991 EPOCH 9 |
|
2024-07-30 06:18:16,347 batch 101/1011 - loss 2.62930829 - lr 0.0001 - time 18.36s |
|
2024-07-30 06:18:34,086 batch 202/1011 - loss 2.60304460 - lr 0.0001 - time 36.10s |
|
2024-07-30 06:18:51,978 batch 303/1011 - loss 2.59367224 - lr 0.0001 - time 53.99s |
|
2024-07-30 06:19:10,316 batch 404/1011 - loss 2.57648826 - lr 0.0001 - time 72.33s |
|
2024-07-30 06:19:27,848 batch 505/1011 - loss 2.56714471 - lr 0.0001 - time 89.86s |
|
2024-07-30 06:19:46,269 batch 606/1011 - loss 2.56959724 - lr 0.0001 - time 108.28s |
|
2024-07-30 06:20:04,011 batch 707/1011 - loss 2.56410600 - lr 0.0001 - time 126.02s |
|
2024-07-30 06:20:22,334 batch 808/1011 - loss 2.56042437 - lr 0.0001 - time 144.34s |
|
2024-07-30 06:20:40,444 batch 909/1011 - loss 2.55821582 - lr 0.0001 - time 162.45s |
|
2024-07-30 06:20:58,271 batch 1010/1011 - loss 2.55264133 - lr 0.0001 - time 180.28s |
|
2024-07-30 06:20:58,552 ---------------------------------------------------------------------------------------------------- |
|
2024-07-30 06:20:58,553 EPOCH 9 DONE |
|
2024-07-30 06:21:07,076 TRAIN Loss: 2.5523 |
|
2024-07-30 06:21:07,077 DEV Loss: 5.2075 |
|
2024-07-30 06:21:07,077 DEV Perplexity: 182.6331 |
|
2024-07-30 06:21:07,077 No improvement for 5 epoch(s) |
|
2024-07-30 06:21:07,077 Patience reached: Terminating model training due to early stopping |
|
2024-07-30 06:21:07,077 ---------------------------------------------------------------------------------------------------- |
|
2024-07-30 06:21:07,077 Finished Training |
|
2024-07-30 06:21:23,786 TEST Perplexity: 155.6613 |
|
2024-07-30 06:28:49,143 TEST BLEU = 20.09 95.7/64.4/11.4/2.3 (BP = 1.000 ratio = 1.000 hyp_len = 46 ref_len = 46) |
|
|