|
2024-07-29 04:42:24,085 ---------------------------------------------------------------------------------------------------- |
|
2024-07-29 04:42:24,085 Training Model |
|
2024-07-29 04:42:24,085 ---------------------------------------------------------------------------------------------------- |
|
2024-07-29 04:42:24,085 Translator( |
|
(encoder): EncoderLSTM( |
|
(embedding): Embedding(112, 300, padding_idx=0) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(lstm): LSTM(300, 512, batch_first=True) |
|
) |
|
(decoder): DecoderLSTM( |
|
(embedding): Embedding(114, 300, padding_idx=0) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
(lstm): LSTM(300, 512, batch_first=True) |
|
(attention): DotProductAttention( |
|
(softmax): Softmax(dim=-1) |
|
(combined2hidden): Sequential( |
|
(0): Linear(in_features=1024, out_features=512, bias=True) |
|
(1): ReLU() |
|
) |
|
) |
|
(hidden2vocab): Linear(in_features=512, out_features=114, bias=True) |
|
(log_softmax): LogSoftmax(dim=-1) |
|
) |
|
) |
|
2024-07-29 04:42:24,085 ---------------------------------------------------------------------------------------------------- |
|
2024-07-29 04:42:24,085 Training Hyperparameters: |
|
2024-07-29 04:42:24,086 - max_epochs: 10 |
|
2024-07-29 04:42:24,086 - learning_rate: 0.001 |
|
2024-07-29 04:42:24,086 - batch_size: 128 |
|
2024-07-29 04:42:24,086 - patience: 5 |
|
2024-07-29 04:42:24,086 - scheduler_patience: 3 |
|
2024-07-29 04:42:24,086 - teacher_forcing_ratio: 0.5 |
|
2024-07-29 04:42:24,086 ---------------------------------------------------------------------------------------------------- |
|
2024-07-29 04:42:24,086 Computational Parameters: |
|
2024-07-29 04:42:24,086 - num_workers: 4 |
|
2024-07-29 04:42:24,086 - device: device(type='cuda', index=0) |
|
2024-07-29 04:42:24,086 ---------------------------------------------------------------------------------------------------- |
|
2024-07-29 04:42:24,086 Dataset Splits: |
|
2024-07-29 04:42:24,086 - train: 133623 data points |
|
2024-07-29 04:42:24,086 - dev: 19090 data points |
|
2024-07-29 04:42:24,086 - test: 38179 data points |
|
2024-07-29 04:42:24,086 ---------------------------------------------------------------------------------------------------- |
|
2024-07-29 04:42:24,086 EPOCH 1 |
|
2024-07-29 04:45:28,740 batch 104/1044 - loss 2.83774002 - lr 0.0010 - time 184.65s |
|
2024-07-29 04:48:40,216 batch 208/1044 - loss 2.69157360 - lr 0.0010 - time 376.13s |
|
2024-07-29 04:51:43,099 batch 312/1044 - loss 2.61162177 - lr 0.0010 - time 559.01s |
|
2024-07-29 04:54:47,347 batch 416/1044 - loss 2.55821791 - lr 0.0010 - time 743.26s |
|
2024-07-29 04:57:41,820 batch 520/1044 - loss 2.51365572 - lr 0.0010 - time 917.73s |
|
2024-07-29 05:00:56,513 batch 624/1044 - loss 2.47733882 - lr 0.0010 - time 1112.43s |
|
2024-07-29 05:03:37,940 batch 728/1044 - loss 2.44389959 - lr 0.0010 - time 1273.85s |
|
2024-07-29 05:06:24,919 batch 832/1044 - loss 2.40961261 - lr 0.0010 - time 1440.83s |
|
2024-07-29 05:09:17,364 batch 936/1044 - loss 2.37903219 - lr 0.0010 - time 1613.28s |
|
2024-07-29 05:12:21,697 batch 1040/1044 - loss 2.34951652 - lr 0.0010 - time 1797.61s |
|
2024-07-29 05:12:26,875 ---------------------------------------------------------------------------------------------------- |
|
2024-07-29 05:12:26,878 EPOCH 1 DONE |
|
2024-07-29 05:13:13,524 TRAIN Loss: 2.3485 |
|
2024-07-29 05:13:13,524 DEV Loss: 3.6517 |
|
2024-07-29 05:13:13,524 DEV Perplexity: 38.5406 |
|
2024-07-29 05:13:13,524 New best score! |
|
2024-07-29 05:13:13,525 ---------------------------------------------------------------------------------------------------- |
|
2024-07-29 05:13:13,525 EPOCH 2 |
|
2024-07-29 05:16:13,790 batch 104/1044 - loss 2.03566457 - lr 0.0010 - time 180.26s |
|
2024-07-29 05:19:18,503 batch 208/1044 - loss 2.01975197 - lr 0.0010 - time 364.98s |
|
2024-07-29 05:22:27,296 batch 312/1044 - loss 2.01034658 - lr 0.0010 - time 553.77s |
|
2024-07-29 05:25:25,285 batch 416/1044 - loss 1.99740521 - lr 0.0010 - time 731.76s |
|
2024-07-29 05:28:20,014 batch 520/1044 - loss 1.98132378 - lr 0.0010 - time 906.49s |
|
2024-07-29 05:31:18,268 batch 624/1044 - loss 1.96808053 - lr 0.0010 - time 1084.74s |
|
2024-07-29 05:34:09,289 batch 728/1044 - loss 1.95846857 - lr 0.0010 - time 1255.76s |
|
2024-07-29 05:37:18,613 batch 832/1044 - loss 1.94876255 - lr 0.0010 - time 1445.09s |
|
2024-07-29 05:40:08,339 batch 936/1044 - loss 1.93996137 - lr 0.0010 - time 1614.81s |
|
2024-07-29 05:43:14,442 batch 1040/1044 - loss 1.93165519 - lr 0.0010 - time 1800.92s |
|
2024-07-29 05:43:22,242 ---------------------------------------------------------------------------------------------------- |
|
2024-07-29 05:43:22,245 EPOCH 2 DONE |
|
2024-07-29 05:44:08,478 TRAIN Loss: 1.9311 |
|
2024-07-29 05:44:08,478 DEV Loss: 3.8520 |
|
2024-07-29 05:44:08,478 DEV Perplexity: 47.0890 |
|
2024-07-29 05:44:08,478 No improvement for 1 epoch(s) |
|
2024-07-29 05:44:08,478 ---------------------------------------------------------------------------------------------------- |
|
2024-07-29 05:44:08,478 EPOCH 3 |
|
2024-07-29 05:47:08,374 batch 104/1044 - loss 1.82115972 - lr 0.0010 - time 179.90s |
|
2024-07-29 05:50:01,154 batch 208/1044 - loss 1.82096945 - lr 0.0010 - time 352.68s |
|
2024-07-29 05:52:50,432 batch 312/1044 - loss 1.81545555 - lr 0.0010 - time 521.95s |
|
2024-07-29 05:56:01,839 batch 416/1044 - loss 1.80929346 - lr 0.0010 - time 713.36s |
|
2024-07-29 05:59:06,754 batch 520/1044 - loss 1.80582210 - lr 0.0010 - time 898.28s |
|
2024-07-29 06:02:08,393 batch 624/1044 - loss 1.79901005 - lr 0.0010 - time 1079.91s |
|
2024-07-29 06:05:20,722 batch 728/1044 - loss 1.79334588 - lr 0.0010 - time 1272.24s |
|
2024-07-29 06:08:05,926 batch 832/1044 - loss 1.78740208 - lr 0.0010 - time 1437.45s |
|
2024-07-29 06:10:59,878 batch 936/1044 - loss 1.78273205 - lr 0.0010 - time 1611.40s |
|
2024-07-29 06:14:05,067 batch 1040/1044 - loss 1.77811699 - lr 0.0010 - time 1796.59s |
|
2024-07-29 06:14:11,499 ---------------------------------------------------------------------------------------------------- |
|
2024-07-29 06:14:11,502 EPOCH 3 DONE |
|
2024-07-29 06:14:57,787 TRAIN Loss: 1.7781 |
|
2024-07-29 06:14:57,787 DEV Loss: 3.8704 |
|
2024-07-29 06:14:57,787 DEV Perplexity: 47.9616 |
|
2024-07-29 06:14:57,787 No improvement for 2 epoch(s) |
|
2024-07-29 06:14:57,787 ---------------------------------------------------------------------------------------------------- |
|
2024-07-29 06:14:57,787 EPOCH 4 |
|
2024-07-29 06:17:47,582 batch 104/1044 - loss 1.70287426 - lr 0.0010 - time 169.79s |
|
2024-07-29 06:20:46,670 batch 208/1044 - loss 1.70862214 - lr 0.0010 - time 348.88s |
|
2024-07-29 06:23:36,046 batch 312/1044 - loss 1.71112499 - lr 0.0010 - time 518.26s |
|
2024-07-29 06:26:40,139 batch 416/1044 - loss 1.70913852 - lr 0.0010 - time 702.35s |
|
2024-07-29 06:29:35,197 batch 520/1044 - loss 1.70622180 - lr 0.0010 - time 877.41s |
|
2024-07-29 06:32:46,182 batch 624/1044 - loss 1.70036061 - lr 0.0010 - time 1068.40s |
|
2024-07-29 06:35:40,667 batch 728/1044 - loss 1.69717816 - lr 0.0010 - time 1242.88s |
|
2024-07-29 06:38:48,262 batch 832/1044 - loss 1.69264350 - lr 0.0010 - time 1430.47s |
|
2024-07-29 06:41:50,576 batch 936/1044 - loss 1.68914897 - lr 0.0010 - time 1612.79s |
|
2024-07-29 06:44:53,396 batch 1040/1044 - loss 1.68603338 - lr 0.0010 - time 1795.61s |
|
2024-07-29 06:45:02,616 ---------------------------------------------------------------------------------------------------- |
|
2024-07-29 06:45:02,618 EPOCH 4 DONE |
|
2024-07-29 06:45:48,992 TRAIN Loss: 1.6859 |
|
2024-07-29 06:45:48,992 DEV Loss: 4.0447 |
|
2024-07-29 06:45:48,992 DEV Perplexity: 57.0919 |
|
2024-07-29 06:45:48,992 No improvement for 3 epoch(s) |
|
2024-07-29 06:45:48,992 ---------------------------------------------------------------------------------------------------- |
|
2024-07-29 06:45:48,992 EPOCH 5 |
|
2024-07-29 06:48:45,115 batch 104/1044 - loss 1.63299357 - lr 0.0010 - time 176.12s |
|
2024-07-29 06:51:35,807 batch 208/1044 - loss 1.62950270 - lr 0.0010 - time 346.81s |
|
2024-07-29 06:54:46,072 batch 312/1044 - loss 1.63286690 - lr 0.0010 - time 537.08s |
|
2024-07-29 06:57:45,633 batch 416/1044 - loss 1.63218283 - lr 0.0010 - time 716.64s |
|
2024-07-29 07:00:43,309 batch 520/1044 - loss 1.63061902 - lr 0.0010 - time 894.32s |
|
2024-07-29 07:03:35,780 batch 624/1044 - loss 1.62754329 - lr 0.0010 - time 1066.79s |
|
2024-07-29 07:06:38,813 batch 728/1044 - loss 1.62501707 - lr 0.0010 - time 1249.82s |
|
2024-07-29 07:09:45,197 batch 832/1044 - loss 1.62138438 - lr 0.0010 - time 1436.20s |
|
2024-07-29 07:12:45,352 batch 936/1044 - loss 1.61940890 - lr 0.0010 - time 1616.36s |
|
2024-07-29 07:15:49,279 batch 1040/1044 - loss 1.61704726 - lr 0.0010 - time 1800.29s |
|
2024-07-29 07:15:54,552 ---------------------------------------------------------------------------------------------------- |
|
2024-07-29 07:15:54,554 EPOCH 5 DONE |
|
2024-07-29 07:16:40,924 TRAIN Loss: 1.6170 |
|
2024-07-29 07:16:40,924 DEV Loss: 4.1085 |
|
2024-07-29 07:16:40,924 DEV Perplexity: 60.8567 |
|
2024-07-29 07:16:40,924 No improvement for 4 epoch(s) |
|
2024-07-29 07:16:40,925 ---------------------------------------------------------------------------------------------------- |
|
2024-07-29 07:16:40,925 EPOCH 6 |
|
2024-07-29 07:19:49,195 batch 104/1044 - loss 1.56859132 - lr 0.0001 - time 188.27s |
|
2024-07-29 07:22:42,313 batch 208/1044 - loss 1.56012409 - lr 0.0001 - time 361.39s |
|
2024-07-29 07:25:52,599 batch 312/1044 - loss 1.55991665 - lr 0.0001 - time 551.67s |
|
2024-07-29 07:28:54,507 batch 416/1044 - loss 1.55604229 - lr 0.0001 - time 733.58s |
|
2024-07-29 07:31:52,384 batch 520/1044 - loss 1.55617112 - lr 0.0001 - time 911.46s |
|
2024-07-29 07:35:13,662 batch 624/1044 - loss 1.55762414 - lr 0.0001 - time 1112.74s |
|
2024-07-29 07:38:13,403 batch 728/1044 - loss 1.55602573 - lr 0.0001 - time 1292.48s |
|
2024-07-29 07:41:07,426 batch 832/1044 - loss 1.55367613 - lr 0.0001 - time 1466.50s |
|
2024-07-29 07:43:47,779 batch 936/1044 - loss 1.55211146 - lr 0.0001 - time 1626.85s |
|
2024-07-29 07:46:37,945 batch 1040/1044 - loss 1.55028499 - lr 0.0001 - time 1797.02s |
|
2024-07-29 07:46:44,673 ---------------------------------------------------------------------------------------------------- |
|
2024-07-29 07:46:44,675 EPOCH 6 DONE |
|
2024-07-29 07:47:30,994 TRAIN Loss: 1.5500 |
|
2024-07-29 07:47:30,994 DEV Loss: 4.2097 |
|
2024-07-29 07:47:30,994 DEV Perplexity: 67.3339 |
|
2024-07-29 07:47:30,994 No improvement for 5 epoch(s) |
|
2024-07-29 07:47:30,994 Patience reached: Terminating model training due to early stopping |
|
2024-07-29 07:47:30,994 ---------------------------------------------------------------------------------------------------- |
|
2024-07-29 07:47:30,994 Finished Training |
|
2024-07-29 07:49:01,240 TEST Perplexity: 38.3937 |
|
2024-07-29 08:00:07,776 TEST BLEU = 2.84 44.3/10.4/0.5/0.3 (BP = 1.000 ratio = 1.000 hyp_len = 97 ref_len = 97) |
|
|