Add swedish models
Browse files- models/en2sv/character_end2end_embeddings_with_attention/log.txt +154 -0
- models/en2sv/character_end2end_embeddings_with_attention/model.pt +3 -0
- models/en2sv/character_end2end_embeddings_without_attention/log.txt +147 -0
- models/en2sv/character_end2end_embeddings_without_attention/model.pt +3 -0
- models/en2sv/word_end2end_embeddings_with_attention/log.txt +208 -0
- models/en2sv/word_end2end_embeddings_with_attention/model.pt +3 -0
- models/en2sv/word_end2end_embeddings_without_attention/log.txt +218 -0
- models/en2sv/word_end2end_embeddings_without_attention/model.pt +3 -0
- models/en2sv/word_word2vec_embeddings_with_attention/log.txt +225 -0
- models/en2sv/word_word2vec_embeddings_with_attention/model.pt +3 -0
- models/en2sv/word_word2vec_embeddings_without_attention/log.txt +201 -0
- models/en2sv/word_word2vec_embeddings_without_attention/model.pt +3 -0
models/en2sv/character_end2end_embeddings_with_attention/log.txt
ADDED
@@ -0,0 +1,154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2024-07-30 02:17:46,750 ----------------------------------------------------------------------------------------------------
|
2 |
+
2024-07-30 02:17:46,750 Training Model
|
3 |
+
2024-07-30 02:17:46,750 ----------------------------------------------------------------------------------------------------
|
4 |
+
2024-07-30 02:17:46,750 Translator(
|
5 |
+
(encoder): EncoderLSTM(
|
6 |
+
(embedding): Embedding(111, 300, padding_idx=0)
|
7 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
8 |
+
(lstm): LSTM(300, 512, batch_first=True)
|
9 |
+
)
|
10 |
+
(decoder): DecoderLSTM(
|
11 |
+
(embedding): Embedding(105, 300, padding_idx=0)
|
12 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
13 |
+
(lstm): LSTM(300, 512, batch_first=True)
|
14 |
+
(attention): DotProductAttention(
|
15 |
+
(softmax): Softmax(dim=-1)
|
16 |
+
(combined2hidden): Sequential(
|
17 |
+
(0): Linear(in_features=1024, out_features=512, bias=True)
|
18 |
+
(1): ReLU()
|
19 |
+
)
|
20 |
+
)
|
21 |
+
(hidden2vocab): Linear(in_features=512, out_features=105, bias=True)
|
22 |
+
(log_softmax): LogSoftmax(dim=-1)
|
23 |
+
)
|
24 |
+
)
|
25 |
+
2024-07-30 02:17:46,750 ----------------------------------------------------------------------------------------------------
|
26 |
+
2024-07-30 02:17:46,750 Training Hyperparameters:
|
27 |
+
2024-07-30 02:17:46,750 - max_epochs: 10
|
28 |
+
2024-07-30 02:17:46,750 - learning_rate: 0.001
|
29 |
+
2024-07-30 02:17:46,750 - batch_size: 128
|
30 |
+
2024-07-30 02:17:46,750 - patience: 5
|
31 |
+
2024-07-30 02:17:46,750 - scheduler_patience: 3
|
32 |
+
2024-07-30 02:17:46,750 - teacher_forcing_ratio: 0.5
|
33 |
+
2024-07-30 02:17:46,750 ----------------------------------------------------------------------------------------------------
|
34 |
+
2024-07-30 02:17:46,750 Computational Parameters:
|
35 |
+
2024-07-30 02:17:46,750 - num_workers: 4
|
36 |
+
2024-07-30 02:17:46,750 - device: device(type='cuda', index=0)
|
37 |
+
2024-07-30 02:17:46,750 ----------------------------------------------------------------------------------------------------
|
38 |
+
2024-07-30 02:17:46,750 Dataset Splits:
|
39 |
+
2024-07-30 02:17:46,751 - train: 129388 data points
|
40 |
+
2024-07-30 02:17:46,751 - dev: 18485 data points
|
41 |
+
2024-07-30 02:17:46,751 - test: 36969 data points
|
42 |
+
2024-07-30 02:17:46,751 ----------------------------------------------------------------------------------------------------
|
43 |
+
2024-07-30 02:17:46,751 EPOCH 1
|
44 |
+
2024-07-30 02:20:14,584 batch 101/1011 - loss 2.86735094 - lr 0.0010 - time 147.83s
|
45 |
+
2024-07-30 02:22:39,297 batch 202/1011 - loss 2.72946281 - lr 0.0010 - time 292.55s
|
46 |
+
2024-07-30 02:24:56,481 batch 303/1011 - loss 2.65172425 - lr 0.0010 - time 429.73s
|
47 |
+
2024-07-30 02:27:17,762 batch 404/1011 - loss 2.60293996 - lr 0.0010 - time 571.01s
|
48 |
+
2024-07-30 02:29:41,974 batch 505/1011 - loss 2.56301742 - lr 0.0010 - time 715.22s
|
49 |
+
2024-07-30 02:32:09,632 batch 606/1011 - loss 2.52287651 - lr 0.0010 - time 862.88s
|
50 |
+
2024-07-30 02:34:33,931 batch 707/1011 - loss 2.47866768 - lr 0.0010 - time 1007.18s
|
51 |
+
2024-07-30 02:37:03,416 batch 808/1011 - loss 2.44011894 - lr 0.0010 - time 1156.66s
|
52 |
+
2024-07-30 02:39:22,440 batch 909/1011 - loss 2.40451258 - lr 0.0010 - time 1295.69s
|
53 |
+
2024-07-30 02:41:41,408 batch 1010/1011 - loss 2.37034330 - lr 0.0010 - time 1434.66s
|
54 |
+
2024-07-30 02:41:42,788 ----------------------------------------------------------------------------------------------------
|
55 |
+
2024-07-30 02:41:42,790 EPOCH 1 DONE
|
56 |
+
2024-07-30 02:42:20,287 TRAIN Loss: 2.3699
|
57 |
+
2024-07-30 02:42:20,288 DEV Loss: 3.7084
|
58 |
+
2024-07-30 02:42:20,288 DEV Perplexity: 40.7891
|
59 |
+
2024-07-30 02:42:20,288 New best score!
|
60 |
+
2024-07-30 02:42:20,290 ----------------------------------------------------------------------------------------------------
|
61 |
+
2024-07-30 02:42:20,290 EPOCH 2
|
62 |
+
2024-07-30 02:44:47,233 batch 101/1011 - loss 2.00985493 - lr 0.0010 - time 146.94s
|
63 |
+
2024-07-30 02:47:17,648 batch 202/1011 - loss 1.99881361 - lr 0.0010 - time 297.36s
|
64 |
+
2024-07-30 02:49:36,884 batch 303/1011 - loss 1.98625110 - lr 0.0010 - time 436.59s
|
65 |
+
2024-07-30 02:52:05,471 batch 404/1011 - loss 1.97792626 - lr 0.0010 - time 585.18s
|
66 |
+
2024-07-30 02:54:23,284 batch 505/1011 - loss 1.96699081 - lr 0.0010 - time 722.99s
|
67 |
+
2024-07-30 02:56:42,486 batch 606/1011 - loss 1.95183234 - lr 0.0010 - time 862.20s
|
68 |
+
2024-07-30 02:59:04,204 batch 707/1011 - loss 1.94068404 - lr 0.0010 - time 1003.91s
|
69 |
+
2024-07-30 03:01:31,559 batch 808/1011 - loss 1.93031463 - lr 0.0010 - time 1151.27s
|
70 |
+
2024-07-30 03:03:52,584 batch 909/1011 - loss 1.91933983 - lr 0.0010 - time 1292.29s
|
71 |
+
2024-07-30 03:06:14,554 batch 1010/1011 - loss 1.90792970 - lr 0.0010 - time 1434.26s
|
72 |
+
2024-07-30 03:06:15,868 ----------------------------------------------------------------------------------------------------
|
73 |
+
2024-07-30 03:06:15,870 EPOCH 2 DONE
|
74 |
+
2024-07-30 03:06:53,487 TRAIN Loss: 1.9079
|
75 |
+
2024-07-30 03:06:53,488 DEV Loss: 4.0378
|
76 |
+
2024-07-30 03:06:53,488 DEV Perplexity: 56.7028
|
77 |
+
2024-07-30 03:06:53,488 No improvement for 1 epoch(s)
|
78 |
+
2024-07-30 03:06:53,488 ----------------------------------------------------------------------------------------------------
|
79 |
+
2024-07-30 03:06:53,489 EPOCH 3
|
80 |
+
2024-07-30 03:09:15,463 batch 101/1011 - loss 1.78907573 - lr 0.0010 - time 141.97s
|
81 |
+
2024-07-30 03:11:42,195 batch 202/1011 - loss 1.78422776 - lr 0.0010 - time 288.71s
|
82 |
+
2024-07-30 03:13:59,221 batch 303/1011 - loss 1.77906499 - lr 0.0010 - time 425.73s
|
83 |
+
2024-07-30 03:16:16,933 batch 404/1011 - loss 1.77259262 - lr 0.0010 - time 563.44s
|
84 |
+
2024-07-30 03:18:33,183 batch 505/1011 - loss 1.76395207 - lr 0.0010 - time 699.69s
|
85 |
+
2024-07-30 03:20:56,446 batch 606/1011 - loss 1.75870391 - lr 0.0010 - time 842.96s
|
86 |
+
2024-07-30 03:23:22,609 batch 707/1011 - loss 1.75321817 - lr 0.0010 - time 989.12s
|
87 |
+
2024-07-30 03:25:54,166 batch 808/1011 - loss 1.74617685 - lr 0.0010 - time 1140.68s
|
88 |
+
2024-07-30 03:28:20,633 batch 909/1011 - loss 1.74084473 - lr 0.0010 - time 1287.14s
|
89 |
+
2024-07-30 03:30:42,444 batch 1010/1011 - loss 1.73547362 - lr 0.0010 - time 1428.96s
|
90 |
+
2024-07-30 03:30:43,412 ----------------------------------------------------------------------------------------------------
|
91 |
+
2024-07-30 03:30:43,414 EPOCH 3 DONE
|
92 |
+
2024-07-30 03:31:20,957 TRAIN Loss: 1.7354
|
93 |
+
2024-07-30 03:31:20,958 DEV Loss: 4.1249
|
94 |
+
2024-07-30 03:31:20,958 DEV Perplexity: 61.8625
|
95 |
+
2024-07-30 03:31:20,958 No improvement for 2 epoch(s)
|
96 |
+
2024-07-30 03:31:20,958 ----------------------------------------------------------------------------------------------------
|
97 |
+
2024-07-30 03:31:20,958 EPOCH 4
|
98 |
+
2024-07-30 03:33:36,147 batch 101/1011 - loss 1.66521794 - lr 0.0010 - time 135.19s
|
99 |
+
2024-07-30 03:35:55,583 batch 202/1011 - loss 1.66554682 - lr 0.0010 - time 274.62s
|
100 |
+
2024-07-30 03:38:27,233 batch 303/1011 - loss 1.65796713 - lr 0.0010 - time 426.28s
|
101 |
+
2024-07-30 03:40:44,185 batch 404/1011 - loss 1.65309123 - lr 0.0010 - time 563.23s
|
102 |
+
2024-07-30 03:43:11,092 batch 505/1011 - loss 1.64910596 - lr 0.0010 - time 710.13s
|
103 |
+
2024-07-30 03:45:38,169 batch 606/1011 - loss 1.64491277 - lr 0.0010 - time 857.21s
|
104 |
+
2024-07-30 03:48:03,029 batch 707/1011 - loss 1.64139012 - lr 0.0010 - time 1002.07s
|
105 |
+
2024-07-30 03:50:23,760 batch 808/1011 - loss 1.63702920 - lr 0.0010 - time 1142.80s
|
106 |
+
2024-07-30 03:52:50,807 batch 909/1011 - loss 1.63416369 - lr 0.0010 - time 1289.85s
|
107 |
+
2024-07-30 03:55:07,385 batch 1010/1011 - loss 1.63085939 - lr 0.0010 - time 1426.43s
|
108 |
+
2024-07-30 03:55:08,515 ----------------------------------------------------------------------------------------------------
|
109 |
+
2024-07-30 03:55:08,516 EPOCH 4 DONE
|
110 |
+
2024-07-30 03:55:46,651 TRAIN Loss: 1.6309
|
111 |
+
2024-07-30 03:55:46,652 DEV Loss: 4.2698
|
112 |
+
2024-07-30 03:55:46,652 DEV Perplexity: 71.5087
|
113 |
+
2024-07-30 03:55:46,652 No improvement for 3 epoch(s)
|
114 |
+
2024-07-30 03:55:46,652 ----------------------------------------------------------------------------------------------------
|
115 |
+
2024-07-30 03:55:46,652 EPOCH 5
|
116 |
+
2024-07-30 03:58:14,943 batch 101/1011 - loss 1.57273971 - lr 0.0010 - time 148.29s
|
117 |
+
2024-07-30 04:00:48,395 batch 202/1011 - loss 1.57111556 - lr 0.0010 - time 301.74s
|
118 |
+
2024-07-30 04:03:11,425 batch 303/1011 - loss 1.57657209 - lr 0.0010 - time 444.77s
|
119 |
+
2024-07-30 04:05:35,078 batch 404/1011 - loss 1.57244594 - lr 0.0010 - time 588.43s
|
120 |
+
2024-07-30 04:07:57,959 batch 505/1011 - loss 1.57071598 - lr 0.0010 - time 731.31s
|
121 |
+
2024-07-30 04:10:15,011 batch 606/1011 - loss 1.56758577 - lr 0.0010 - time 868.36s
|
122 |
+
2024-07-30 04:12:35,064 batch 707/1011 - loss 1.56390217 - lr 0.0010 - time 1008.41s
|
123 |
+
2024-07-30 04:14:53,925 batch 808/1011 - loss 1.56026725 - lr 0.0010 - time 1147.27s
|
124 |
+
2024-07-30 04:17:11,220 batch 909/1011 - loss 1.55733682 - lr 0.0010 - time 1284.57s
|
125 |
+
2024-07-30 04:19:37,188 batch 1010/1011 - loss 1.55493684 - lr 0.0010 - time 1430.54s
|
126 |
+
2024-07-30 04:19:38,665 ----------------------------------------------------------------------------------------------------
|
127 |
+
2024-07-30 04:19:38,667 EPOCH 5 DONE
|
128 |
+
2024-07-30 04:20:16,279 TRAIN Loss: 1.5550
|
129 |
+
2024-07-30 04:20:16,279 DEV Loss: 4.2535
|
130 |
+
2024-07-30 04:20:16,279 DEV Perplexity: 70.3542
|
131 |
+
2024-07-30 04:20:16,279 No improvement for 4 epoch(s)
|
132 |
+
2024-07-30 04:20:16,279 ----------------------------------------------------------------------------------------------------
|
133 |
+
2024-07-30 04:20:16,279 EPOCH 6
|
134 |
+
2024-07-30 04:22:41,790 batch 101/1011 - loss 1.48926209 - lr 0.0001 - time 145.51s
|
135 |
+
2024-07-30 04:25:08,198 batch 202/1011 - loss 1.49229986 - lr 0.0001 - time 291.92s
|
136 |
+
2024-07-30 04:27:29,248 batch 303/1011 - loss 1.49066265 - lr 0.0001 - time 432.97s
|
137 |
+
2024-07-30 04:29:59,135 batch 404/1011 - loss 1.48735474 - lr 0.0001 - time 582.86s
|
138 |
+
2024-07-30 04:32:13,744 batch 505/1011 - loss 1.48638164 - lr 0.0001 - time 717.47s
|
139 |
+
2024-07-30 04:34:44,208 batch 606/1011 - loss 1.48563741 - lr 0.0001 - time 867.93s
|
140 |
+
2024-07-30 04:37:02,924 batch 707/1011 - loss 1.48429131 - lr 0.0001 - time 1006.64s
|
141 |
+
2024-07-30 04:39:29,286 batch 808/1011 - loss 1.48379995 - lr 0.0001 - time 1153.01s
|
142 |
+
2024-07-30 04:41:45,546 batch 909/1011 - loss 1.48132304 - lr 0.0001 - time 1289.27s
|
143 |
+
2024-07-30 04:44:12,255 batch 1010/1011 - loss 1.48057979 - lr 0.0001 - time 1435.98s
|
144 |
+
2024-07-30 04:44:13,677 ----------------------------------------------------------------------------------------------------
|
145 |
+
2024-07-30 04:44:13,680 EPOCH 6 DONE
|
146 |
+
2024-07-30 04:44:51,213 TRAIN Loss: 1.4806
|
147 |
+
2024-07-30 04:44:51,214 DEV Loss: 4.3550
|
148 |
+
2024-07-30 04:44:51,214 DEV Perplexity: 77.8631
|
149 |
+
2024-07-30 04:44:51,214 No improvement for 5 epoch(s)
|
150 |
+
2024-07-30 04:44:51,214 Patience reached: Terminating model training due to early stopping
|
151 |
+
2024-07-30 04:44:51,214 ----------------------------------------------------------------------------------------------------
|
152 |
+
2024-07-30 04:44:51,214 Finished Training
|
153 |
+
2024-07-30 04:46:11,048 TEST Perplexity: 40.8583
|
154 |
+
2024-07-30 04:56:51,808 TEST BLEU = 5.88 51.6/16.4/1.7/0.8 (BP = 1.000 ratio = 1.000 hyp_len = 62 ref_len = 62)
|
models/en2sv/character_end2end_embeddings_with_attention/model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c620f8a0768fdea31b5107d548e17840be820590934eebd1fa3af0ef9f279af9
|
3 |
+
size 15919272
|
models/en2sv/character_end2end_embeddings_without_attention/log.txt
ADDED
@@ -0,0 +1,147 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2024-07-30 04:57:02,129 ----------------------------------------------------------------------------------------------------
|
2 |
+
2024-07-30 04:57:02,129 Training Model
|
3 |
+
2024-07-30 04:57:02,129 ----------------------------------------------------------------------------------------------------
|
4 |
+
2024-07-30 04:57:02,129 Translator(
|
5 |
+
(encoder): EncoderLSTM(
|
6 |
+
(embedding): Embedding(111, 300, padding_idx=0)
|
7 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
8 |
+
(lstm): LSTM(300, 512, batch_first=True, bidirectional=True)
|
9 |
+
)
|
10 |
+
(decoder): DecoderLSTM(
|
11 |
+
(embedding): Embedding(105, 300, padding_idx=0)
|
12 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
13 |
+
(lstm): LSTM(300, 1024, batch_first=True)
|
14 |
+
(hidden2vocab): Linear(in_features=1024, out_features=105, bias=True)
|
15 |
+
(log_softmax): LogSoftmax(dim=-1)
|
16 |
+
)
|
17 |
+
)
|
18 |
+
2024-07-30 04:57:02,129 ----------------------------------------------------------------------------------------------------
|
19 |
+
2024-07-30 04:57:02,129 Training Hyperparameters:
|
20 |
+
2024-07-30 04:57:02,129 - max_epochs: 10
|
21 |
+
2024-07-30 04:57:02,129 - learning_rate: 0.001
|
22 |
+
2024-07-30 04:57:02,129 - batch_size: 128
|
23 |
+
2024-07-30 04:57:02,129 - patience: 5
|
24 |
+
2024-07-30 04:57:02,129 - scheduler_patience: 3
|
25 |
+
2024-07-30 04:57:02,129 - teacher_forcing_ratio: 0.5
|
26 |
+
2024-07-30 04:57:02,129 ----------------------------------------------------------------------------------------------------
|
27 |
+
2024-07-30 04:57:02,129 Computational Parameters:
|
28 |
+
2024-07-30 04:57:02,129 - num_workers: 4
|
29 |
+
2024-07-30 04:57:02,129 - device: device(type='cuda', index=0)
|
30 |
+
2024-07-30 04:57:02,129 ----------------------------------------------------------------------------------------------------
|
31 |
+
2024-07-30 04:57:02,129 Dataset Splits:
|
32 |
+
2024-07-30 04:57:02,129 - train: 129388 data points
|
33 |
+
2024-07-30 04:57:02,129 - dev: 18485 data points
|
34 |
+
2024-07-30 04:57:02,129 - test: 36969 data points
|
35 |
+
2024-07-30 04:57:02,129 ----------------------------------------------------------------------------------------------------
|
36 |
+
2024-07-30 04:57:02,129 EPOCH 1
|
37 |
+
2024-07-30 04:57:45,341 batch 101/1011 - loss 2.82079955 - lr 0.0010 - time 43.21s
|
38 |
+
2024-07-30 04:58:27,310 batch 202/1011 - loss 2.69378598 - lr 0.0010 - time 85.18s
|
39 |
+
2024-07-30 04:59:08,866 batch 303/1011 - loss 2.62132871 - lr 0.0010 - time 126.74s
|
40 |
+
2024-07-30 04:59:51,100 batch 404/1011 - loss 2.57552936 - lr 0.0010 - time 168.97s
|
41 |
+
2024-07-30 05:00:34,295 batch 505/1011 - loss 2.53784329 - lr 0.0010 - time 212.17s
|
42 |
+
2024-07-30 05:01:16,512 batch 606/1011 - loss 2.50643325 - lr 0.0010 - time 254.38s
|
43 |
+
2024-07-30 05:01:57,974 batch 707/1011 - loss 2.48143017 - lr 0.0010 - time 295.84s
|
44 |
+
2024-07-30 05:02:39,476 batch 808/1011 - loss 2.46100130 - lr 0.0010 - time 337.35s
|
45 |
+
2024-07-30 05:03:21,639 batch 909/1011 - loss 2.44204216 - lr 0.0010 - time 379.51s
|
46 |
+
2024-07-30 05:04:03,778 batch 1010/1011 - loss 2.42566051 - lr 0.0010 - time 421.65s
|
47 |
+
2024-07-30 05:04:04,420 ----------------------------------------------------------------------------------------------------
|
48 |
+
2024-07-30 05:04:04,422 EPOCH 1 DONE
|
49 |
+
2024-07-30 05:04:26,639 TRAIN Loss: 2.4255
|
50 |
+
2024-07-30 05:04:26,639 DEV Loss: 3.1692
|
51 |
+
2024-07-30 05:04:26,639 DEV Perplexity: 23.7874
|
52 |
+
2024-07-30 05:04:26,640 New best score!
|
53 |
+
2024-07-30 05:04:26,641 ----------------------------------------------------------------------------------------------------
|
54 |
+
2024-07-30 05:04:26,641 EPOCH 2
|
55 |
+
2024-07-30 05:05:09,309 batch 101/1011 - loss 2.25588560 - lr 0.0010 - time 42.67s
|
56 |
+
2024-07-30 05:05:51,337 batch 202/1011 - loss 2.25245563 - lr 0.0010 - time 84.70s
|
57 |
+
2024-07-30 05:06:33,509 batch 303/1011 - loss 2.24505461 - lr 0.0010 - time 126.87s
|
58 |
+
2024-07-30 05:07:16,564 batch 404/1011 - loss 2.24304425 - lr 0.0010 - time 169.92s
|
59 |
+
2024-07-30 05:07:58,544 batch 505/1011 - loss 2.24009475 - lr 0.0010 - time 211.90s
|
60 |
+
2024-07-30 05:08:40,755 batch 606/1011 - loss 2.23375209 - lr 0.0010 - time 254.11s
|
61 |
+
2024-07-30 05:09:23,303 batch 707/1011 - loss 2.23068869 - lr 0.0010 - time 296.66s
|
62 |
+
2024-07-30 05:10:04,337 batch 808/1011 - loss 2.22755016 - lr 0.0010 - time 337.70s
|
63 |
+
2024-07-30 05:10:45,438 batch 909/1011 - loss 2.22347232 - lr 0.0010 - time 378.80s
|
64 |
+
2024-07-30 05:11:28,051 batch 1010/1011 - loss 2.21967396 - lr 0.0010 - time 421.41s
|
65 |
+
2024-07-30 05:11:28,574 ----------------------------------------------------------------------------------------------------
|
66 |
+
2024-07-30 05:11:28,576 EPOCH 2 DONE
|
67 |
+
2024-07-30 05:11:50,909 TRAIN Loss: 2.2196
|
68 |
+
2024-07-30 05:11:50,910 DEV Loss: 3.2402
|
69 |
+
2024-07-30 05:11:50,910 DEV Perplexity: 25.5396
|
70 |
+
2024-07-30 05:11:50,910 No improvement for 1 epoch(s)
|
71 |
+
2024-07-30 05:11:50,910 ----------------------------------------------------------------------------------------------------
|
72 |
+
2024-07-30 05:11:50,910 EPOCH 3
|
73 |
+
2024-07-30 05:12:32,461 batch 101/1011 - loss 2.16919004 - lr 0.0010 - time 41.55s
|
74 |
+
2024-07-30 05:13:15,711 batch 202/1011 - loss 2.16290541 - lr 0.0010 - time 84.80s
|
75 |
+
2024-07-30 05:13:56,872 batch 303/1011 - loss 2.16205912 - lr 0.0010 - time 125.96s
|
76 |
+
2024-07-30 05:14:39,969 batch 404/1011 - loss 2.16153205 - lr 0.0010 - time 169.06s
|
77 |
+
2024-07-30 05:15:22,839 batch 505/1011 - loss 2.15689037 - lr 0.0010 - time 211.93s
|
78 |
+
2024-07-30 05:16:06,735 batch 606/1011 - loss 2.15584467 - lr 0.0010 - time 255.83s
|
79 |
+
2024-07-30 05:16:49,402 batch 707/1011 - loss 2.15352337 - lr 0.0010 - time 298.49s
|
80 |
+
2024-07-30 05:17:29,519 batch 808/1011 - loss 2.15093230 - lr 0.0010 - time 338.61s
|
81 |
+
2024-07-30 05:18:11,078 batch 909/1011 - loss 2.14926926 - lr 0.0010 - time 380.17s
|
82 |
+
2024-07-30 05:18:53,181 batch 1010/1011 - loss 2.14489703 - lr 0.0010 - time 422.27s
|
83 |
+
2024-07-30 05:18:53,669 ----------------------------------------------------------------------------------------------------
|
84 |
+
2024-07-30 05:18:53,670 EPOCH 3 DONE
|
85 |
+
2024-07-30 05:19:15,925 TRAIN Loss: 2.1448
|
86 |
+
2024-07-30 05:19:15,925 DEV Loss: 3.3125
|
87 |
+
2024-07-30 05:19:15,925 DEV Perplexity: 27.4542
|
88 |
+
2024-07-30 05:19:15,925 No improvement for 2 epoch(s)
|
89 |
+
2024-07-30 05:19:15,925 ----------------------------------------------------------------------------------------------------
|
90 |
+
2024-07-30 05:19:15,925 EPOCH 4
|
91 |
+
2024-07-30 05:19:58,111 batch 101/1011 - loss 2.12072047 - lr 0.0010 - time 42.19s
|
92 |
+
2024-07-30 05:20:40,240 batch 202/1011 - loss 2.21416611 - lr 0.0010 - time 84.32s
|
93 |
+
2024-07-30 05:21:23,529 batch 303/1011 - loss 2.21167549 - lr 0.0010 - time 127.60s
|
94 |
+
2024-07-30 05:22:05,818 batch 404/1011 - loss 2.20075539 - lr 0.0010 - time 169.89s
|
95 |
+
2024-07-30 05:22:47,681 batch 505/1011 - loss 2.19065783 - lr 0.0010 - time 211.76s
|
96 |
+
2024-07-30 05:23:29,405 batch 606/1011 - loss 2.18455681 - lr 0.0010 - time 253.48s
|
97 |
+
2024-07-30 05:24:10,802 batch 707/1011 - loss 2.17808891 - lr 0.0010 - time 294.88s
|
98 |
+
2024-07-30 05:24:53,617 batch 808/1011 - loss 2.17207219 - lr 0.0010 - time 337.69s
|
99 |
+
2024-07-30 05:25:35,367 batch 909/1011 - loss 2.16698701 - lr 0.0010 - time 379.44s
|
100 |
+
2024-07-30 05:26:17,747 batch 1010/1011 - loss 2.16209597 - lr 0.0010 - time 421.82s
|
101 |
+
2024-07-30 05:26:18,326 ----------------------------------------------------------------------------------------------------
|
102 |
+
2024-07-30 05:26:18,327 EPOCH 4 DONE
|
103 |
+
2024-07-30 05:26:40,665 TRAIN Loss: 2.1622
|
104 |
+
2024-07-30 05:26:40,665 DEV Loss: 3.2818
|
105 |
+
2024-07-30 05:26:40,665 DEV Perplexity: 26.6225
|
106 |
+
2024-07-30 05:26:40,665 No improvement for 3 epoch(s)
|
107 |
+
2024-07-30 05:26:40,665 ----------------------------------------------------------------------------------------------------
|
108 |
+
2024-07-30 05:26:40,665 EPOCH 5
|
109 |
+
2024-07-30 05:27:22,961 batch 101/1011 - loss 2.10391398 - lr 0.0010 - time 42.30s
|
110 |
+
2024-07-30 05:28:05,465 batch 202/1011 - loss 2.09989159 - lr 0.0010 - time 84.80s
|
111 |
+
2024-07-30 05:28:46,754 batch 303/1011 - loss 2.10524238 - lr 0.0010 - time 126.09s
|
112 |
+
2024-07-30 05:29:28,860 batch 404/1011 - loss 2.10348708 - lr 0.0010 - time 168.19s
|
113 |
+
2024-07-30 05:30:11,004 batch 505/1011 - loss 2.10245773 - lr 0.0010 - time 210.34s
|
114 |
+
2024-07-30 05:30:54,362 batch 606/1011 - loss 2.10020018 - lr 0.0010 - time 253.70s
|
115 |
+
2024-07-30 05:31:36,542 batch 707/1011 - loss 2.10009074 - lr 0.0010 - time 295.88s
|
116 |
+
2024-07-30 05:32:17,742 batch 808/1011 - loss 2.09978649 - lr 0.0010 - time 337.08s
|
117 |
+
2024-07-30 05:32:59,360 batch 909/1011 - loss 2.10032565 - lr 0.0010 - time 378.69s
|
118 |
+
2024-07-30 05:33:41,143 batch 1010/1011 - loss 2.09873604 - lr 0.0010 - time 420.48s
|
119 |
+
2024-07-30 05:33:41,606 ----------------------------------------------------------------------------------------------------
|
120 |
+
2024-07-30 05:33:41,606 EPOCH 5 DONE
|
121 |
+
2024-07-30 05:34:03,756 TRAIN Loss: 2.0986
|
122 |
+
2024-07-30 05:34:03,756 DEV Loss: 3.2678
|
123 |
+
2024-07-30 05:34:03,756 DEV Perplexity: 26.2533
|
124 |
+
2024-07-30 05:34:03,756 No improvement for 4 epoch(s)
|
125 |
+
2024-07-30 05:34:03,756 ----------------------------------------------------------------------------------------------------
|
126 |
+
2024-07-30 05:34:03,756 EPOCH 6
|
127 |
+
2024-07-30 05:34:46,645 batch 101/1011 - loss 2.06396743 - lr 0.0001 - time 42.89s
|
128 |
+
2024-07-30 05:35:28,798 batch 202/1011 - loss 2.07453595 - lr 0.0001 - time 85.04s
|
129 |
+
2024-07-30 05:36:11,339 batch 303/1011 - loss 2.07824306 - lr 0.0001 - time 127.58s
|
130 |
+
2024-07-30 05:36:52,894 batch 404/1011 - loss 2.07652715 - lr 0.0001 - time 169.14s
|
131 |
+
2024-07-30 05:37:33,844 batch 505/1011 - loss 2.07604869 - lr 0.0001 - time 210.09s
|
132 |
+
2024-07-30 05:38:17,119 batch 606/1011 - loss 2.07629355 - lr 0.0001 - time 253.36s
|
133 |
+
2024-07-30 05:38:59,677 batch 707/1011 - loss 2.07612398 - lr 0.0001 - time 295.92s
|
134 |
+
2024-07-30 05:39:42,943 batch 808/1011 - loss 2.07533724 - lr 0.0001 - time 339.19s
|
135 |
+
2024-07-30 05:40:25,654 batch 909/1011 - loss 2.07488621 - lr 0.0001 - time 381.90s
|
136 |
+
2024-07-30 05:41:07,662 batch 1010/1011 - loss 2.07499973 - lr 0.0001 - time 423.91s
|
137 |
+
2024-07-30 05:41:08,105 ----------------------------------------------------------------------------------------------------
|
138 |
+
2024-07-30 05:41:08,106 EPOCH 6 DONE
|
139 |
+
2024-07-30 05:41:30,425 TRAIN Loss: 2.0752
|
140 |
+
2024-07-30 05:41:30,425 DEV Loss: 3.2455
|
141 |
+
2024-07-30 05:41:30,425 DEV Perplexity: 25.6752
|
142 |
+
2024-07-30 05:41:30,426 No improvement for 5 epoch(s)
|
143 |
+
2024-07-30 05:41:30,426 Patience reached: Terminating model training due to early stopping
|
144 |
+
2024-07-30 05:41:30,426 ----------------------------------------------------------------------------------------------------
|
145 |
+
2024-07-30 05:41:30,426 Finished Training
|
146 |
+
2024-07-30 05:42:15,845 TEST Perplexity: 23.7420
|
147 |
+
2024-07-30 05:52:16,110 TEST BLEU = 4.88 41.3/11.0/2.2/0.6 (BP = 1.000 ratio = 1.000 hyp_len = 92 ref_len = 92)
|
models/en2sv/character_end2end_embeddings_without_attention/model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e755e2a44a40cbf58deab21d0411647e206610feeba42a7e73f73281ec988390
|
3 |
+
size 35760052
|
models/en2sv/word_end2end_embeddings_with_attention/log.txt
ADDED
@@ -0,0 +1,208 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2024-07-30 05:52:44,411 ----------------------------------------------------------------------------------------------------
|
2 |
+
2024-07-30 05:52:44,411 Training Model
|
3 |
+
2024-07-30 05:52:44,411 ----------------------------------------------------------------------------------------------------
|
4 |
+
2024-07-30 05:52:44,411 Translator(
|
5 |
+
(encoder): EncoderLSTM(
|
6 |
+
(embedding): Embedding(13968, 300, padding_idx=0)
|
7 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
8 |
+
(lstm): LSTM(300, 512, batch_first=True)
|
9 |
+
)
|
10 |
+
(decoder): DecoderLSTM(
|
11 |
+
(embedding): Embedding(21119, 300, padding_idx=0)
|
12 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
13 |
+
(lstm): LSTM(300, 512, batch_first=True)
|
14 |
+
(attention): DotProductAttention(
|
15 |
+
(softmax): Softmax(dim=-1)
|
16 |
+
(combined2hidden): Sequential(
|
17 |
+
(0): Linear(in_features=1024, out_features=512, bias=True)
|
18 |
+
(1): ReLU()
|
19 |
+
)
|
20 |
+
)
|
21 |
+
(hidden2vocab): Linear(in_features=512, out_features=21119, bias=True)
|
22 |
+
(log_softmax): LogSoftmax(dim=-1)
|
23 |
+
)
|
24 |
+
)
|
25 |
+
2024-07-30 05:52:44,411 ----------------------------------------------------------------------------------------------------
|
26 |
+
2024-07-30 05:52:44,411 Training Hyperparameters:
|
27 |
+
2024-07-30 05:52:44,411 - max_epochs: 10
|
28 |
+
2024-07-30 05:52:44,411 - learning_rate: 0.001
|
29 |
+
2024-07-30 05:52:44,411 - batch_size: 128
|
30 |
+
2024-07-30 05:52:44,411 - patience: 5
|
31 |
+
2024-07-30 05:52:44,411 - scheduler_patience: 3
|
32 |
+
2024-07-30 05:52:44,411 - teacher_forcing_ratio: 0.5
|
33 |
+
2024-07-30 05:52:44,411 ----------------------------------------------------------------------------------------------------
|
34 |
+
2024-07-30 05:52:44,411 Computational Parameters:
|
35 |
+
2024-07-30 05:52:44,411 - num_workers: 4
|
36 |
+
2024-07-30 05:52:44,412 - device: device(type='cuda', index=0)
|
37 |
+
2024-07-30 05:52:44,412 ----------------------------------------------------------------------------------------------------
|
38 |
+
2024-07-30 05:52:44,412 Dataset Splits:
|
39 |
+
2024-07-30 05:52:44,412 - train: 129388 data points
|
40 |
+
2024-07-30 05:52:44,412 - dev: 18485 data points
|
41 |
+
2024-07-30 05:52:44,412 - test: 36969 data points
|
42 |
+
2024-07-30 05:52:44,412 ----------------------------------------------------------------------------------------------------
|
43 |
+
2024-07-30 05:52:44,412 EPOCH 1
|
44 |
+
2024-07-30 05:53:01,839 batch 101/1011 - loss 6.65023891 - lr 0.0010 - time 17.43s
|
45 |
+
2024-07-30 05:53:19,994 batch 202/1011 - loss 6.37670207 - lr 0.0010 - time 35.58s
|
46 |
+
2024-07-30 05:53:37,722 batch 303/1011 - loss 6.18777885 - lr 0.0010 - time 53.31s
|
47 |
+
2024-07-30 05:53:55,183 batch 404/1011 - loss 6.02016410 - lr 0.0010 - time 70.77s
|
48 |
+
2024-07-30 05:54:12,971 batch 505/1011 - loss 5.87491408 - lr 0.0010 - time 88.56s
|
49 |
+
2024-07-30 05:54:30,796 batch 606/1011 - loss 5.74571717 - lr 0.0010 - time 106.38s
|
50 |
+
2024-07-30 05:54:48,498 batch 707/1011 - loss 5.62989615 - lr 0.0010 - time 124.09s
|
51 |
+
2024-07-30 05:55:06,801 batch 808/1011 - loss 5.52785168 - lr 0.0010 - time 142.39s
|
52 |
+
2024-07-30 05:55:24,580 batch 909/1011 - loss 5.43488396 - lr 0.0010 - time 160.17s
|
53 |
+
2024-07-30 05:55:42,937 batch 1010/1011 - loss 5.35160901 - lr 0.0010 - time 178.53s
|
54 |
+
2024-07-30 05:55:43,149 ----------------------------------------------------------------------------------------------------
|
55 |
+
2024-07-30 05:55:43,150 EPOCH 1 DONE
|
56 |
+
2024-07-30 05:55:51,676 TRAIN Loss: 5.3508
|
57 |
+
2024-07-30 05:55:51,677 DEV Loss: 5.3985
|
58 |
+
2024-07-30 05:55:51,677 DEV Perplexity: 221.0851
|
59 |
+
2024-07-30 05:55:51,677 New best score!
|
60 |
+
2024-07-30 05:55:51,678 ----------------------------------------------------------------------------------------------------
|
61 |
+
2024-07-30 05:55:51,678 EPOCH 2
|
62 |
+
2024-07-30 05:56:10,138 batch 101/1011 - loss 4.35387788 - lr 0.0010 - time 18.46s
|
63 |
+
2024-07-30 05:56:27,943 batch 202/1011 - loss 4.31176025 - lr 0.0010 - time 36.26s
|
64 |
+
2024-07-30 05:56:45,329 batch 303/1011 - loss 4.26554860 - lr 0.0010 - time 53.65s
|
65 |
+
2024-07-30 05:57:03,548 batch 404/1011 - loss 4.22823117 - lr 0.0010 - time 71.87s
|
66 |
+
2024-07-30 05:57:21,407 batch 505/1011 - loss 4.19638747 - lr 0.0010 - time 89.73s
|
67 |
+
2024-07-30 05:57:40,047 batch 606/1011 - loss 4.16619647 - lr 0.0010 - time 108.37s
|
68 |
+
2024-07-30 05:57:58,263 batch 707/1011 - loss 4.13296555 - lr 0.0010 - time 126.59s
|
69 |
+
2024-07-30 05:58:16,175 batch 808/1011 - loss 4.10798012 - lr 0.0010 - time 144.50s
|
70 |
+
2024-07-30 05:58:34,009 batch 909/1011 - loss 4.08410400 - lr 0.0010 - time 162.33s
|
71 |
+
2024-07-30 05:58:51,761 batch 1010/1011 - loss 4.06205331 - lr 0.0010 - time 180.08s
|
72 |
+
2024-07-30 05:58:51,991 ----------------------------------------------------------------------------------------------------
|
73 |
+
2024-07-30 05:58:51,992 EPOCH 2 DONE
|
74 |
+
2024-07-30 05:59:00,286 TRAIN Loss: 4.0617
|
75 |
+
2024-07-30 05:59:00,286 DEV Loss: 5.3025
|
76 |
+
2024-07-30 05:59:00,286 DEV Perplexity: 200.8374
|
77 |
+
2024-07-30 05:59:00,286 New best score!
|
78 |
+
2024-07-30 05:59:00,287 ----------------------------------------------------------------------------------------------------
|
79 |
+
2024-07-30 05:59:00,287 EPOCH 3
|
80 |
+
2024-07-30 05:59:18,575 batch 101/1011 - loss 3.63924071 - lr 0.0010 - time 18.29s
|
81 |
+
2024-07-30 05:59:35,942 batch 202/1011 - loss 3.64374635 - lr 0.0010 - time 35.65s
|
82 |
+
2024-07-30 05:59:53,930 batch 303/1011 - loss 3.62353413 - lr 0.0010 - time 53.64s
|
83 |
+
2024-07-30 06:00:12,653 batch 404/1011 - loss 3.60985195 - lr 0.0010 - time 72.37s
|
84 |
+
2024-07-30 06:00:31,232 batch 505/1011 - loss 3.60273010 - lr 0.0010 - time 90.95s
|
85 |
+
2024-07-30 06:00:49,167 batch 606/1011 - loss 3.59089866 - lr 0.0010 - time 108.88s
|
86 |
+
2024-07-30 06:01:07,550 batch 707/1011 - loss 3.58091725 - lr 0.0010 - time 127.26s
|
87 |
+
2024-07-30 06:01:25,805 batch 808/1011 - loss 3.56992363 - lr 0.0010 - time 145.52s
|
88 |
+
2024-07-30 06:01:43,461 batch 909/1011 - loss 3.56065750 - lr 0.0010 - time 163.17s
|
89 |
+
2024-07-30 06:02:01,601 batch 1010/1011 - loss 3.55507214 - lr 0.0010 - time 181.31s
|
90 |
+
2024-07-30 06:02:01,845 ----------------------------------------------------------------------------------------------------
|
91 |
+
2024-07-30 06:02:01,845 EPOCH 3 DONE
|
92 |
+
2024-07-30 06:02:10,227 TRAIN Loss: 3.5548
|
93 |
+
2024-07-30 06:02:10,227 DEV Loss: 5.3260
|
94 |
+
2024-07-30 06:02:10,227 DEV Perplexity: 205.6116
|
95 |
+
2024-07-30 06:02:10,227 No improvement for 1 epoch(s)
|
96 |
+
2024-07-30 06:02:10,227 ----------------------------------------------------------------------------------------------------
|
97 |
+
2024-07-30 06:02:10,227 EPOCH 4
|
98 |
+
2024-07-30 06:02:28,830 batch 101/1011 - loss 3.28445469 - lr 0.0010 - time 18.60s
|
99 |
+
2024-07-30 06:02:46,477 batch 202/1011 - loss 3.28501337 - lr 0.0010 - time 36.25s
|
100 |
+
2024-07-30 06:03:04,557 batch 303/1011 - loss 3.28228449 - lr 0.0010 - time 54.33s
|
101 |
+
2024-07-30 06:03:22,554 batch 404/1011 - loss 3.28625823 - lr 0.0010 - time 72.33s
|
102 |
+
2024-07-30 06:03:40,976 batch 505/1011 - loss 3.28577206 - lr 0.0010 - time 90.75s
|
103 |
+
2024-07-30 06:03:59,242 batch 606/1011 - loss 3.27718717 - lr 0.0010 - time 109.02s
|
104 |
+
2024-07-30 06:04:17,149 batch 707/1011 - loss 3.27785742 - lr 0.0010 - time 126.92s
|
105 |
+
2024-07-30 06:04:35,362 batch 808/1011 - loss 3.27383622 - lr 0.0010 - time 145.13s
|
106 |
+
2024-07-30 06:04:53,460 batch 909/1011 - loss 3.27473364 - lr 0.0010 - time 163.23s
|
107 |
+
2024-07-30 06:05:11,650 batch 1010/1011 - loss 3.26900207 - lr 0.0010 - time 181.42s
|
108 |
+
2024-07-30 06:05:11,891 ----------------------------------------------------------------------------------------------------
|
109 |
+
2024-07-30 06:05:11,892 EPOCH 4 DONE
|
110 |
+
2024-07-30 06:05:20,185 TRAIN Loss: 3.2689
|
111 |
+
2024-07-30 06:05:20,185 DEV Loss: 5.0322
|
112 |
+
2024-07-30 06:05:20,185 DEV Perplexity: 153.2689
|
113 |
+
2024-07-30 06:05:20,185 New best score!
|
114 |
+
2024-07-30 06:05:20,186 ----------------------------------------------------------------------------------------------------
|
115 |
+
2024-07-30 06:05:20,186 EPOCH 5
|
116 |
+
2024-07-30 06:05:38,378 batch 101/1011 - loss 3.04620511 - lr 0.0010 - time 18.19s
|
117 |
+
2024-07-30 06:05:56,066 batch 202/1011 - loss 3.05621696 - lr 0.0010 - time 35.88s
|
118 |
+
2024-07-30 06:06:14,397 batch 303/1011 - loss 3.07215565 - lr 0.0010 - time 54.21s
|
119 |
+
2024-07-30 06:06:32,274 batch 404/1011 - loss 3.07633480 - lr 0.0010 - time 72.09s
|
120 |
+
2024-07-30 06:06:50,327 batch 505/1011 - loss 3.08537655 - lr 0.0010 - time 90.14s
|
121 |
+
2024-07-30 06:07:08,595 batch 606/1011 - loss 3.08699256 - lr 0.0010 - time 108.41s
|
122 |
+
2024-07-30 06:07:26,268 batch 707/1011 - loss 3.09115884 - lr 0.0010 - time 126.08s
|
123 |
+
2024-07-30 06:07:43,949 batch 808/1011 - loss 3.09043722 - lr 0.0010 - time 143.76s
|
124 |
+
2024-07-30 06:08:02,577 batch 909/1011 - loss 3.09457381 - lr 0.0010 - time 162.39s
|
125 |
+
2024-07-30 06:08:20,523 batch 1010/1011 - loss 3.09344615 - lr 0.0010 - time 180.34s
|
126 |
+
2024-07-30 06:08:20,849 ----------------------------------------------------------------------------------------------------
|
127 |
+
2024-07-30 06:08:20,850 EPOCH 5 DONE
|
128 |
+
2024-07-30 06:08:29,338 TRAIN Loss: 3.0932
|
129 |
+
2024-07-30 06:08:29,339 DEV Loss: 5.2149
|
130 |
+
2024-07-30 06:08:29,339 DEV Perplexity: 183.9882
|
131 |
+
2024-07-30 06:08:29,339 No improvement for 1 epoch(s)
|
132 |
+
2024-07-30 06:08:29,339 ----------------------------------------------------------------------------------------------------
|
133 |
+
2024-07-30 06:08:29,339 EPOCH 6
|
134 |
+
2024-07-30 06:08:48,148 batch 101/1011 - loss 2.95294287 - lr 0.0010 - time 18.81s
|
135 |
+
2024-07-30 06:09:05,811 batch 202/1011 - loss 2.94095918 - lr 0.0010 - time 36.47s
|
136 |
+
2024-07-30 06:09:24,029 batch 303/1011 - loss 2.92945394 - lr 0.0010 - time 54.69s
|
137 |
+
2024-07-30 06:09:41,027 batch 404/1011 - loss 2.93493413 - lr 0.0010 - time 71.69s
|
138 |
+
2024-07-30 06:09:59,141 batch 505/1011 - loss 2.93981232 - lr 0.0010 - time 89.80s
|
139 |
+
2024-07-30 06:10:17,688 batch 606/1011 - loss 2.94975308 - lr 0.0010 - time 108.35s
|
140 |
+
2024-07-30 06:10:35,813 batch 707/1011 - loss 2.95301807 - lr 0.0010 - time 126.47s
|
141 |
+
2024-07-30 06:10:53,277 batch 808/1011 - loss 2.95479195 - lr 0.0010 - time 143.94s
|
142 |
+
2024-07-30 06:11:11,427 batch 909/1011 - loss 2.95849915 - lr 0.0010 - time 162.09s
|
143 |
+
2024-07-30 06:11:29,521 batch 1010/1011 - loss 2.96108001 - lr 0.0010 - time 180.18s
|
144 |
+
2024-07-30 06:11:29,783 ----------------------------------------------------------------------------------------------------
|
145 |
+
2024-07-30 06:11:29,784 EPOCH 6 DONE
|
146 |
+
2024-07-30 06:11:38,034 TRAIN Loss: 2.9611
|
147 |
+
2024-07-30 06:11:38,034 DEV Loss: 5.2295
|
148 |
+
2024-07-30 06:11:38,034 DEV Perplexity: 186.7041
|
149 |
+
2024-07-30 06:11:38,034 No improvement for 2 epoch(s)
|
150 |
+
2024-07-30 06:11:38,034 ----------------------------------------------------------------------------------------------------
|
151 |
+
2024-07-30 06:11:38,034 EPOCH 7
|
152 |
+
2024-07-30 06:11:56,963 batch 101/1011 - loss 2.83695398 - lr 0.0010 - time 18.93s
|
153 |
+
2024-07-30 06:12:14,852 batch 202/1011 - loss 2.82333080 - lr 0.0010 - time 36.82s
|
154 |
+
2024-07-30 06:12:33,156 batch 303/1011 - loss 2.82609393 - lr 0.0010 - time 55.12s
|
155 |
+
2024-07-30 06:12:50,576 batch 404/1011 - loss 2.83502390 - lr 0.0010 - time 72.54s
|
156 |
+
2024-07-30 06:13:08,639 batch 505/1011 - loss 2.84160646 - lr 0.0010 - time 90.60s
|
157 |
+
2024-07-30 06:13:26,933 batch 606/1011 - loss 2.84488981 - lr 0.0010 - time 108.90s
|
158 |
+
2024-07-30 06:13:45,357 batch 707/1011 - loss 2.84374654 - lr 0.0010 - time 127.32s
|
159 |
+
2024-07-30 06:14:03,581 batch 808/1011 - loss 2.84697443 - lr 0.0010 - time 145.55s
|
160 |
+
2024-07-30 06:14:21,037 batch 909/1011 - loss 2.84805540 - lr 0.0010 - time 163.00s
|
161 |
+
2024-07-30 06:14:39,192 batch 1010/1011 - loss 2.85068582 - lr 0.0010 - time 181.16s
|
162 |
+
2024-07-30 06:14:39,414 ----------------------------------------------------------------------------------------------------
|
163 |
+
2024-07-30 06:14:39,415 EPOCH 7 DONE
|
164 |
+
2024-07-30 06:14:47,851 TRAIN Loss: 2.8508
|
165 |
+
2024-07-30 06:14:47,851 DEV Loss: 5.2494
|
166 |
+
2024-07-30 06:14:47,851 DEV Perplexity: 190.4522
|
167 |
+
2024-07-30 06:14:47,851 No improvement for 3 epoch(s)
|
168 |
+
2024-07-30 06:14:47,851 ----------------------------------------------------------------------------------------------------
|
169 |
+
2024-07-30 06:14:47,851 EPOCH 8
|
170 |
+
2024-07-30 06:15:06,292 batch 101/1011 - loss 2.68063131 - lr 0.0010 - time 18.44s
|
171 |
+
2024-07-30 06:15:24,465 batch 202/1011 - loss 2.70113396 - lr 0.0010 - time 36.61s
|
172 |
+
2024-07-30 06:15:43,138 batch 303/1011 - loss 2.71602054 - lr 0.0010 - time 55.29s
|
173 |
+
2024-07-30 06:16:01,098 batch 404/1011 - loss 2.73064416 - lr 0.0010 - time 73.25s
|
174 |
+
2024-07-30 06:16:19,362 batch 505/1011 - loss 2.73879396 - lr 0.0010 - time 91.51s
|
175 |
+
2024-07-30 06:16:37,807 batch 606/1011 - loss 2.74986825 - lr 0.0010 - time 109.96s
|
176 |
+
2024-07-30 06:16:55,815 batch 707/1011 - loss 2.75368593 - lr 0.0010 - time 127.96s
|
177 |
+
2024-07-30 06:17:13,486 batch 808/1011 - loss 2.75654979 - lr 0.0010 - time 145.64s
|
178 |
+
2024-07-30 06:17:31,466 batch 909/1011 - loss 2.76380862 - lr 0.0010 - time 163.61s
|
179 |
+
2024-07-30 06:17:49,300 batch 1010/1011 - loss 2.76961123 - lr 0.0010 - time 181.45s
|
180 |
+
2024-07-30 06:17:49,575 ----------------------------------------------------------------------------------------------------
|
181 |
+
2024-07-30 06:17:49,576 EPOCH 8 DONE
|
182 |
+
2024-07-30 06:17:57,990 TRAIN Loss: 2.7696
|
183 |
+
2024-07-30 06:17:57,990 DEV Loss: 5.0718
|
184 |
+
2024-07-30 06:17:57,990 DEV Perplexity: 159.4646
|
185 |
+
2024-07-30 06:17:57,991 No improvement for 4 epoch(s)
|
186 |
+
2024-07-30 06:17:57,991 ----------------------------------------------------------------------------------------------------
|
187 |
+
2024-07-30 06:17:57,991 EPOCH 9
|
188 |
+
2024-07-30 06:18:16,347 batch 101/1011 - loss 2.62930829 - lr 0.0001 - time 18.36s
|
189 |
+
2024-07-30 06:18:34,086 batch 202/1011 - loss 2.60304460 - lr 0.0001 - time 36.10s
|
190 |
+
2024-07-30 06:18:51,978 batch 303/1011 - loss 2.59367224 - lr 0.0001 - time 53.99s
|
191 |
+
2024-07-30 06:19:10,316 batch 404/1011 - loss 2.57648826 - lr 0.0001 - time 72.33s
|
192 |
+
2024-07-30 06:19:27,848 batch 505/1011 - loss 2.56714471 - lr 0.0001 - time 89.86s
|
193 |
+
2024-07-30 06:19:46,269 batch 606/1011 - loss 2.56959724 - lr 0.0001 - time 108.28s
|
194 |
+
2024-07-30 06:20:04,011 batch 707/1011 - loss 2.56410600 - lr 0.0001 - time 126.02s
|
195 |
+
2024-07-30 06:20:22,334 batch 808/1011 - loss 2.56042437 - lr 0.0001 - time 144.34s
|
196 |
+
2024-07-30 06:20:40,444 batch 909/1011 - loss 2.55821582 - lr 0.0001 - time 162.45s
|
197 |
+
2024-07-30 06:20:58,271 batch 1010/1011 - loss 2.55264133 - lr 0.0001 - time 180.28s
|
198 |
+
2024-07-30 06:20:58,552 ----------------------------------------------------------------------------------------------------
|
199 |
+
2024-07-30 06:20:58,553 EPOCH 9 DONE
|
200 |
+
2024-07-30 06:21:07,076 TRAIN Loss: 2.5523
|
201 |
+
2024-07-30 06:21:07,077 DEV Loss: 5.2075
|
202 |
+
2024-07-30 06:21:07,077 DEV Perplexity: 182.6331
|
203 |
+
2024-07-30 06:21:07,077 No improvement for 5 epoch(s)
|
204 |
+
2024-07-30 06:21:07,077 Patience reached: Terminating model training due to early stopping
|
205 |
+
2024-07-30 06:21:07,077 ----------------------------------------------------------------------------------------------------
|
206 |
+
2024-07-30 06:21:07,077 Finished Training
|
207 |
+
2024-07-30 06:21:23,786 TEST Perplexity: 155.6613
|
208 |
+
2024-07-30 06:28:49,143 TEST BLEU = 20.09 95.7/64.4/11.4/2.3 (BP = 1.000 ratio = 1.000 hyp_len = 46 ref_len = 46)
|
models/en2sv/word_end2end_embeddings_with_attention/model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f0138df8589c4e503f585ea20a5606d53198067b20da170f01bc23901edb4828
|
3 |
+
size 101929576
|
models/en2sv/word_end2end_embeddings_without_attention/log.txt
ADDED
@@ -0,0 +1,218 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2024-07-30 06:29:18,150 ----------------------------------------------------------------------------------------------------
|
2 |
+
2024-07-30 06:29:18,150 Training Model
|
3 |
+
2024-07-30 06:29:18,150 ----------------------------------------------------------------------------------------------------
|
4 |
+
2024-07-30 06:29:18,150 Translator(
|
5 |
+
(encoder): EncoderLSTM(
|
6 |
+
(embedding): Embedding(13968, 300, padding_idx=0)
|
7 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
8 |
+
(lstm): LSTM(300, 512, batch_first=True, bidirectional=True)
|
9 |
+
)
|
10 |
+
(decoder): DecoderLSTM(
|
11 |
+
(embedding): Embedding(21119, 300, padding_idx=0)
|
12 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
13 |
+
(lstm): LSTM(300, 1024, batch_first=True)
|
14 |
+
(hidden2vocab): Linear(in_features=1024, out_features=21119, bias=True)
|
15 |
+
(log_softmax): LogSoftmax(dim=-1)
|
16 |
+
)
|
17 |
+
)
|
18 |
+
2024-07-30 06:29:18,150 ----------------------------------------------------------------------------------------------------
|
19 |
+
2024-07-30 06:29:18,150 Training Hyperparameters:
|
20 |
+
2024-07-30 06:29:18,150 - max_epochs: 10
|
21 |
+
2024-07-30 06:29:18,150 - learning_rate: 0.001
|
22 |
+
2024-07-30 06:29:18,150 - batch_size: 128
|
23 |
+
2024-07-30 06:29:18,150 - patience: 5
|
24 |
+
2024-07-30 06:29:18,150 - scheduler_patience: 3
|
25 |
+
2024-07-30 06:29:18,150 - teacher_forcing_ratio: 0.5
|
26 |
+
2024-07-30 06:29:18,150 ----------------------------------------------------------------------------------------------------
|
27 |
+
2024-07-30 06:29:18,150 Computational Parameters:
|
28 |
+
2024-07-30 06:29:18,150 - num_workers: 4
|
29 |
+
2024-07-30 06:29:18,150 - device: device(type='cuda', index=0)
|
30 |
+
2024-07-30 06:29:18,150 ----------------------------------------------------------------------------------------------------
|
31 |
+
2024-07-30 06:29:18,150 Dataset Splits:
|
32 |
+
2024-07-30 06:29:18,150 - train: 129388 data points
|
33 |
+
2024-07-30 06:29:18,150 - dev: 18485 data points
|
34 |
+
2024-07-30 06:29:18,150 - test: 36969 data points
|
35 |
+
2024-07-30 06:29:18,150 ----------------------------------------------------------------------------------------------------
|
36 |
+
2024-07-30 06:29:18,151 EPOCH 1
|
37 |
+
2024-07-30 06:29:38,858 batch 101/1011 - loss 6.54900506 - lr 0.0010 - time 20.71s
|
38 |
+
2024-07-30 06:30:00,285 batch 202/1011 - loss 6.26388977 - lr 0.0010 - time 42.13s
|
39 |
+
2024-07-30 06:30:21,175 batch 303/1011 - loss 6.09678941 - lr 0.0010 - time 63.02s
|
40 |
+
2024-07-30 06:30:42,015 batch 404/1011 - loss 5.97068659 - lr 0.0010 - time 83.86s
|
41 |
+
2024-07-30 06:31:03,964 batch 505/1011 - loss 5.87083804 - lr 0.0010 - time 105.81s
|
42 |
+
2024-07-30 06:31:24,546 batch 606/1011 - loss 5.78565772 - lr 0.0010 - time 126.40s
|
43 |
+
2024-07-30 06:31:45,551 batch 707/1011 - loss 5.71605991 - lr 0.0010 - time 147.40s
|
44 |
+
2024-07-30 06:32:06,407 batch 808/1011 - loss 5.65229468 - lr 0.0010 - time 168.26s
|
45 |
+
2024-07-30 06:32:27,060 batch 909/1011 - loss 5.59811051 - lr 0.0010 - time 188.91s
|
46 |
+
2024-07-30 06:32:48,176 batch 1010/1011 - loss 5.54892200 - lr 0.0010 - time 210.03s
|
47 |
+
2024-07-30 06:32:48,504 ----------------------------------------------------------------------------------------------------
|
48 |
+
2024-07-30 06:32:48,504 EPOCH 1 DONE
|
49 |
+
2024-07-30 06:32:59,194 TRAIN Loss: 5.5488
|
50 |
+
2024-07-30 06:32:59,195 DEV Loss: 5.6299
|
51 |
+
2024-07-30 06:32:59,195 DEV Perplexity: 278.6326
|
52 |
+
2024-07-30 06:32:59,195 New best score!
|
53 |
+
2024-07-30 06:32:59,196 ----------------------------------------------------------------------------------------------------
|
54 |
+
2024-07-30 06:32:59,196 EPOCH 2
|
55 |
+
2024-07-30 06:33:20,681 batch 101/1011 - loss 4.94433821 - lr 0.0010 - time 21.48s
|
56 |
+
2024-07-30 06:33:41,401 batch 202/1011 - loss 4.92562360 - lr 0.0010 - time 42.21s
|
57 |
+
2024-07-30 06:34:02,529 batch 303/1011 - loss 4.90439558 - lr 0.0010 - time 63.33s
|
58 |
+
2024-07-30 06:34:23,187 batch 404/1011 - loss 4.88209260 - lr 0.0010 - time 83.99s
|
59 |
+
2024-07-30 06:34:44,258 batch 505/1011 - loss 4.86276889 - lr 0.0010 - time 105.06s
|
60 |
+
2024-07-30 06:35:05,629 batch 606/1011 - loss 4.84399123 - lr 0.0010 - time 126.43s
|
61 |
+
2024-07-30 06:35:27,453 batch 707/1011 - loss 4.82627641 - lr 0.0010 - time 148.26s
|
62 |
+
2024-07-30 06:35:48,468 batch 808/1011 - loss 4.81157339 - lr 0.0010 - time 169.27s
|
63 |
+
2024-07-30 06:36:09,648 batch 909/1011 - loss 4.79432231 - lr 0.0010 - time 190.45s
|
64 |
+
2024-07-30 06:36:30,076 batch 1010/1011 - loss 4.77798532 - lr 0.0010 - time 210.88s
|
65 |
+
2024-07-30 06:36:30,348 ----------------------------------------------------------------------------------------------------
|
66 |
+
2024-07-30 06:36:30,349 EPOCH 2 DONE
|
67 |
+
2024-07-30 06:36:40,802 TRAIN Loss: 4.7778
|
68 |
+
2024-07-30 06:36:40,802 DEV Loss: 5.3981
|
69 |
+
2024-07-30 06:36:40,803 DEV Perplexity: 220.9858
|
70 |
+
2024-07-30 06:36:40,803 New best score!
|
71 |
+
2024-07-30 06:36:40,803 ----------------------------------------------------------------------------------------------------
|
72 |
+
2024-07-30 06:36:40,803 EPOCH 3
|
73 |
+
2024-07-30 06:37:02,097 batch 101/1011 - loss 4.45201429 - lr 0.0010 - time 21.29s
|
74 |
+
2024-07-30 06:37:23,306 batch 202/1011 - loss 4.45439901 - lr 0.0010 - time 42.50s
|
75 |
+
2024-07-30 06:37:44,874 batch 303/1011 - loss 4.44706424 - lr 0.0010 - time 64.07s
|
76 |
+
2024-07-30 06:38:05,446 batch 404/1011 - loss 4.44014408 - lr 0.0010 - time 84.64s
|
77 |
+
2024-07-30 06:38:25,941 batch 505/1011 - loss 4.43406733 - lr 0.0010 - time 105.14s
|
78 |
+
2024-07-30 06:38:46,557 batch 606/1011 - loss 4.42863054 - lr 0.0010 - time 125.75s
|
79 |
+
2024-07-30 06:39:07,885 batch 707/1011 - loss 4.42343303 - lr 0.0010 - time 147.08s
|
80 |
+
2024-07-30 06:39:29,228 batch 808/1011 - loss 4.41553006 - lr 0.0010 - time 168.42s
|
81 |
+
2024-07-30 06:39:51,098 batch 909/1011 - loss 4.40918738 - lr 0.0010 - time 190.29s
|
82 |
+
2024-07-30 06:40:11,715 batch 1010/1011 - loss 4.40014238 - lr 0.0010 - time 210.91s
|
83 |
+
2024-07-30 06:40:12,004 ----------------------------------------------------------------------------------------------------
|
84 |
+
2024-07-30 06:40:12,004 EPOCH 3 DONE
|
85 |
+
2024-07-30 06:40:22,560 TRAIN Loss: 4.3999
|
86 |
+
2024-07-30 06:40:22,560 DEV Loss: 5.2924
|
87 |
+
2024-07-30 06:40:22,560 DEV Perplexity: 198.8155
|
88 |
+
2024-07-30 06:40:22,560 New best score!
|
89 |
+
2024-07-30 06:40:22,561 ----------------------------------------------------------------------------------------------------
|
90 |
+
2024-07-30 06:40:22,561 EPOCH 4
|
91 |
+
2024-07-30 06:40:43,916 batch 101/1011 - loss 4.10502068 - lr 0.0010 - time 21.36s
|
92 |
+
2024-07-30 06:41:05,786 batch 202/1011 - loss 4.11935668 - lr 0.0010 - time 43.23s
|
93 |
+
2024-07-30 06:41:26,914 batch 303/1011 - loss 4.12561246 - lr 0.0010 - time 64.35s
|
94 |
+
2024-07-30 06:41:48,295 batch 404/1011 - loss 4.13155013 - lr 0.0010 - time 85.73s
|
95 |
+
2024-07-30 06:42:09,279 batch 505/1011 - loss 4.12985176 - lr 0.0010 - time 106.72s
|
96 |
+
2024-07-30 06:42:30,673 batch 606/1011 - loss 4.12696999 - lr 0.0010 - time 128.11s
|
97 |
+
2024-07-30 06:42:51,819 batch 707/1011 - loss 4.12794560 - lr 0.0010 - time 149.26s
|
98 |
+
2024-07-30 06:43:13,570 batch 808/1011 - loss 4.12481921 - lr 0.0010 - time 171.01s
|
99 |
+
2024-07-30 06:43:34,748 batch 909/1011 - loss 4.12359074 - lr 0.0010 - time 192.19s
|
100 |
+
2024-07-30 06:43:55,887 batch 1010/1011 - loss 4.12494421 - lr 0.0010 - time 213.33s
|
101 |
+
2024-07-30 06:43:56,162 ----------------------------------------------------------------------------------------------------
|
102 |
+
2024-07-30 06:43:56,162 EPOCH 4 DONE
|
103 |
+
2024-07-30 06:44:06,857 TRAIN Loss: 4.1250
|
104 |
+
2024-07-30 06:44:06,857 DEV Loss: 5.2752
|
105 |
+
2024-07-30 06:44:06,857 DEV Perplexity: 195.4273
|
106 |
+
2024-07-30 06:44:06,857 New best score!
|
107 |
+
2024-07-30 06:44:06,858 ----------------------------------------------------------------------------------------------------
|
108 |
+
2024-07-30 06:44:06,858 EPOCH 5
|
109 |
+
2024-07-30 06:44:28,273 batch 101/1011 - loss 3.85123608 - lr 0.0010 - time 21.41s
|
110 |
+
2024-07-30 06:44:49,120 batch 202/1011 - loss 3.86484195 - lr 0.0010 - time 42.26s
|
111 |
+
2024-07-30 06:45:09,888 batch 303/1011 - loss 3.87152359 - lr 0.0010 - time 63.03s
|
112 |
+
2024-07-30 06:45:31,194 batch 404/1011 - loss 3.88143508 - lr 0.0010 - time 84.34s
|
113 |
+
2024-07-30 06:45:52,914 batch 505/1011 - loss 3.89105263 - lr 0.0010 - time 106.06s
|
114 |
+
2024-07-30 06:46:13,635 batch 606/1011 - loss 3.89264883 - lr 0.0010 - time 126.78s
|
115 |
+
2024-07-30 06:46:34,967 batch 707/1011 - loss 3.89493803 - lr 0.0010 - time 148.11s
|
116 |
+
2024-07-30 06:46:56,029 batch 808/1011 - loss 3.89701127 - lr 0.0010 - time 169.17s
|
117 |
+
2024-07-30 06:47:16,775 batch 909/1011 - loss 3.89973897 - lr 0.0010 - time 189.92s
|
118 |
+
2024-07-30 06:47:37,318 batch 1010/1011 - loss 3.90153952 - lr 0.0010 - time 210.46s
|
119 |
+
2024-07-30 06:47:37,605 ----------------------------------------------------------------------------------------------------
|
120 |
+
2024-07-30 06:47:37,605 EPOCH 5 DONE
|
121 |
+
2024-07-30 06:47:48,364 TRAIN Loss: 3.9015
|
122 |
+
2024-07-30 06:47:48,364 DEV Loss: 5.2348
|
123 |
+
2024-07-30 06:47:48,364 DEV Perplexity: 187.6933
|
124 |
+
2024-07-30 06:47:48,364 New best score!
|
125 |
+
2024-07-30 06:47:48,365 ----------------------------------------------------------------------------------------------------
|
126 |
+
2024-07-30 06:47:48,365 EPOCH 6
|
127 |
+
2024-07-30 06:48:09,564 batch 101/1011 - loss 3.67885203 - lr 0.0010 - time 21.20s
|
128 |
+
2024-07-30 06:48:30,392 batch 202/1011 - loss 3.67257118 - lr 0.0010 - time 42.03s
|
129 |
+
2024-07-30 06:48:51,339 batch 303/1011 - loss 3.67440230 - lr 0.0010 - time 62.97s
|
130 |
+
2024-07-30 06:49:12,496 batch 404/1011 - loss 3.68773369 - lr 0.0010 - time 84.13s
|
131 |
+
2024-07-30 06:49:33,845 batch 505/1011 - loss 3.69679483 - lr 0.0010 - time 105.48s
|
132 |
+
2024-07-30 06:49:54,576 batch 606/1011 - loss 3.70562146 - lr 0.0010 - time 126.21s
|
133 |
+
2024-07-30 06:50:14,894 batch 707/1011 - loss 3.70857451 - lr 0.0010 - time 146.53s
|
134 |
+
2024-07-30 06:50:36,246 batch 808/1011 - loss 3.70958607 - lr 0.0010 - time 167.88s
|
135 |
+
2024-07-30 06:50:57,289 batch 909/1011 - loss 3.71686371 - lr 0.0010 - time 188.92s
|
136 |
+
2024-07-30 06:51:18,946 batch 1010/1011 - loss 3.72122688 - lr 0.0010 - time 210.58s
|
137 |
+
2024-07-30 06:51:19,225 ----------------------------------------------------------------------------------------------------
|
138 |
+
2024-07-30 06:51:19,225 EPOCH 6 DONE
|
139 |
+
2024-07-30 06:51:29,836 TRAIN Loss: 3.7215
|
140 |
+
2024-07-30 06:51:29,837 DEV Loss: 5.1690
|
141 |
+
2024-07-30 06:51:29,837 DEV Perplexity: 175.7387
|
142 |
+
2024-07-30 06:51:29,837 New best score!
|
143 |
+
2024-07-30 06:51:29,837 ----------------------------------------------------------------------------------------------------
|
144 |
+
2024-07-30 06:51:29,837 EPOCH 7
|
145 |
+
2024-07-30 06:51:51,044 batch 101/1011 - loss 3.50269853 - lr 0.0010 - time 21.21s
|
146 |
+
2024-07-30 06:52:11,911 batch 202/1011 - loss 3.51418849 - lr 0.0010 - time 42.07s
|
147 |
+
2024-07-30 06:52:32,926 batch 303/1011 - loss 3.52463826 - lr 0.0010 - time 63.09s
|
148 |
+
2024-07-30 06:52:53,845 batch 404/1011 - loss 3.53294488 - lr 0.0010 - time 84.01s
|
149 |
+
2024-07-30 06:53:15,266 batch 505/1011 - loss 3.54319899 - lr 0.0010 - time 105.43s
|
150 |
+
2024-07-30 06:53:36,682 batch 606/1011 - loss 3.55099323 - lr 0.0010 - time 126.84s
|
151 |
+
2024-07-30 06:53:57,551 batch 707/1011 - loss 3.55656118 - lr 0.0010 - time 147.71s
|
152 |
+
2024-07-30 06:54:19,003 batch 808/1011 - loss 3.56330248 - lr 0.0010 - time 169.17s
|
153 |
+
2024-07-30 06:54:40,032 batch 909/1011 - loss 3.56810741 - lr 0.0010 - time 190.19s
|
154 |
+
2024-07-30 06:55:01,300 batch 1010/1011 - loss 3.57099324 - lr 0.0010 - time 211.46s
|
155 |
+
2024-07-30 06:55:01,585 ----------------------------------------------------------------------------------------------------
|
156 |
+
2024-07-30 06:55:01,585 EPOCH 7 DONE
|
157 |
+
2024-07-30 06:55:12,312 TRAIN Loss: 3.5710
|
158 |
+
2024-07-30 06:55:12,312 DEV Loss: 5.2825
|
159 |
+
2024-07-30 06:55:12,312 DEV Perplexity: 196.8538
|
160 |
+
2024-07-30 06:55:12,312 No improvement for 1 epoch(s)
|
161 |
+
2024-07-30 06:55:12,312 ----------------------------------------------------------------------------------------------------
|
162 |
+
2024-07-30 06:55:12,312 EPOCH 8
|
163 |
+
2024-07-30 06:55:33,685 batch 101/1011 - loss 3.34728996 - lr 0.0010 - time 21.37s
|
164 |
+
2024-07-30 06:55:54,750 batch 202/1011 - loss 3.35408541 - lr 0.0010 - time 42.44s
|
165 |
+
2024-07-30 06:56:15,549 batch 303/1011 - loss 3.37382807 - lr 0.0010 - time 63.24s
|
166 |
+
2024-07-30 06:56:37,109 batch 404/1011 - loss 3.39155905 - lr 0.0010 - time 84.80s
|
167 |
+
2024-07-30 06:56:58,283 batch 505/1011 - loss 3.39773268 - lr 0.0010 - time 105.97s
|
168 |
+
2024-07-30 06:57:20,022 batch 606/1011 - loss 3.40938065 - lr 0.0010 - time 127.71s
|
169 |
+
2024-07-30 06:57:41,313 batch 707/1011 - loss 3.41756647 - lr 0.0010 - time 149.00s
|
170 |
+
2024-07-30 06:58:02,753 batch 808/1011 - loss 3.42700205 - lr 0.0010 - time 170.44s
|
171 |
+
2024-07-30 06:58:23,969 batch 909/1011 - loss 3.43592657 - lr 0.0010 - time 191.66s
|
172 |
+
2024-07-30 06:58:45,316 batch 1010/1011 - loss 3.44247971 - lr 0.0010 - time 213.00s
|
173 |
+
2024-07-30 06:58:45,604 ----------------------------------------------------------------------------------------------------
|
174 |
+
2024-07-30 06:58:45,604 EPOCH 8 DONE
|
175 |
+
2024-07-30 06:58:56,301 TRAIN Loss: 3.4425
|
176 |
+
2024-07-30 06:58:56,302 DEV Loss: 5.2746
|
177 |
+
2024-07-30 06:58:56,302 DEV Perplexity: 195.3166
|
178 |
+
2024-07-30 06:58:56,302 No improvement for 2 epoch(s)
|
179 |
+
2024-07-30 06:58:56,302 ----------------------------------------------------------------------------------------------------
|
180 |
+
2024-07-30 06:58:56,302 EPOCH 9
|
181 |
+
2024-07-30 06:59:17,783 batch 101/1011 - loss 3.23941568 - lr 0.0010 - time 21.48s
|
182 |
+
2024-07-30 06:59:39,064 batch 202/1011 - loss 3.24939437 - lr 0.0010 - time 42.76s
|
183 |
+
2024-07-30 06:59:59,954 batch 303/1011 - loss 3.26940986 - lr 0.0010 - time 63.65s
|
184 |
+
2024-07-30 07:00:20,959 batch 404/1011 - loss 3.29002602 - lr 0.0010 - time 84.66s
|
185 |
+
2024-07-30 07:00:41,611 batch 505/1011 - loss 3.29511334 - lr 0.0010 - time 105.31s
|
186 |
+
2024-07-30 07:01:02,853 batch 606/1011 - loss 3.30349820 - lr 0.0010 - time 126.55s
|
187 |
+
2024-07-30 07:01:24,635 batch 707/1011 - loss 3.31520849 - lr 0.0010 - time 148.33s
|
188 |
+
2024-07-30 07:01:46,227 batch 808/1011 - loss 3.32208459 - lr 0.0010 - time 169.92s
|
189 |
+
2024-07-30 07:02:07,447 batch 909/1011 - loss 3.33127639 - lr 0.0010 - time 191.15s
|
190 |
+
2024-07-30 07:02:27,987 batch 1010/1011 - loss 3.33636988 - lr 0.0010 - time 211.69s
|
191 |
+
2024-07-30 07:02:28,253 ----------------------------------------------------------------------------------------------------
|
192 |
+
2024-07-30 07:02:28,253 EPOCH 9 DONE
|
193 |
+
2024-07-30 07:02:38,971 TRAIN Loss: 3.3368
|
194 |
+
2024-07-30 07:02:38,971 DEV Loss: 5.2779
|
195 |
+
2024-07-30 07:02:38,971 DEV Perplexity: 195.9665
|
196 |
+
2024-07-30 07:02:38,971 No improvement for 3 epoch(s)
|
197 |
+
2024-07-30 07:02:38,971 ----------------------------------------------------------------------------------------------------
|
198 |
+
2024-07-30 07:02:38,971 EPOCH 10
|
199 |
+
2024-07-30 07:03:00,463 batch 101/1011 - loss 3.11399065 - lr 0.0010 - time 21.49s
|
200 |
+
2024-07-30 07:03:21,788 batch 202/1011 - loss 3.12331956 - lr 0.0010 - time 42.82s
|
201 |
+
2024-07-30 07:03:42,669 batch 303/1011 - loss 3.15361623 - lr 0.0010 - time 63.70s
|
202 |
+
2024-07-30 07:04:04,156 batch 404/1011 - loss 3.16798984 - lr 0.0010 - time 85.19s
|
203 |
+
2024-07-30 07:04:24,780 batch 505/1011 - loss 3.17598451 - lr 0.0010 - time 105.81s
|
204 |
+
2024-07-30 07:04:45,658 batch 606/1011 - loss 3.18800673 - lr 0.0010 - time 126.69s
|
205 |
+
2024-07-30 07:05:06,528 batch 707/1011 - loss 3.19686390 - lr 0.0010 - time 147.56s
|
206 |
+
2024-07-30 07:05:27,156 batch 808/1011 - loss 3.20963642 - lr 0.0010 - time 168.18s
|
207 |
+
2024-07-30 07:05:48,412 batch 909/1011 - loss 3.22162488 - lr 0.0010 - time 189.44s
|
208 |
+
2024-07-30 07:06:09,241 batch 1010/1011 - loss 3.22763753 - lr 0.0010 - time 210.27s
|
209 |
+
2024-07-30 07:06:09,522 ----------------------------------------------------------------------------------------------------
|
210 |
+
2024-07-30 07:06:09,522 EPOCH 10 DONE
|
211 |
+
2024-07-30 07:06:20,116 TRAIN Loss: 3.2275
|
212 |
+
2024-07-30 07:06:20,116 DEV Loss: 5.3749
|
213 |
+
2024-07-30 07:06:20,116 DEV Perplexity: 215.9214
|
214 |
+
2024-07-30 07:06:20,116 No improvement for 4 epoch(s)
|
215 |
+
2024-07-30 07:06:20,116 ----------------------------------------------------------------------------------------------------
|
216 |
+
2024-07-30 07:06:20,116 Finished Training
|
217 |
+
2024-07-30 07:06:41,395 TEST Perplexity: 175.6135
|
218 |
+
2024-07-30 07:10:06,670 TEST BLEU = 22.19 93.7/67.7/23.0/1.7 (BP = 1.000 ratio = 1.000 hyp_len = 63 ref_len = 63)
|
models/en2sv/word_end2end_embeddings_without_attention/model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:147b79f92d6a67729079e2cfd7aab4feb2ee6d5fc9d15cdb14f21c0aa731afec
|
3 |
+
size 164807028
|
models/en2sv/word_word2vec_embeddings_with_attention/log.txt
ADDED
@@ -0,0 +1,225 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2024-07-30 07:10:16,535 ----------------------------------------------------------------------------------------------------
|
2 |
+
2024-07-30 07:10:16,535 Training Model
|
3 |
+
2024-07-30 07:10:16,535 ----------------------------------------------------------------------------------------------------
|
4 |
+
2024-07-30 07:10:16,535 Translator(
|
5 |
+
(encoder): EncoderLSTM(
|
6 |
+
(embedding): Embedding(13968, 300, padding_idx=13963)
|
7 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
8 |
+
(lstm): LSTM(300, 512, batch_first=True)
|
9 |
+
)
|
10 |
+
(decoder): DecoderLSTM(
|
11 |
+
(embedding): Embedding(21119, 300, padding_idx=21114)
|
12 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
13 |
+
(lstm): LSTM(300, 512, batch_first=True)
|
14 |
+
(attention): DotProductAttention(
|
15 |
+
(softmax): Softmax(dim=-1)
|
16 |
+
(combined2hidden): Sequential(
|
17 |
+
(0): Linear(in_features=1024, out_features=512, bias=True)
|
18 |
+
(1): ReLU()
|
19 |
+
)
|
20 |
+
)
|
21 |
+
(hidden2vocab): Linear(in_features=512, out_features=21119, bias=True)
|
22 |
+
(log_softmax): LogSoftmax(dim=-1)
|
23 |
+
)
|
24 |
+
)
|
25 |
+
2024-07-30 07:10:16,535 ----------------------------------------------------------------------------------------------------
|
26 |
+
2024-07-30 07:10:16,535 Training Hyperparameters:
|
27 |
+
2024-07-30 07:10:16,535 - max_epochs: 10
|
28 |
+
2024-07-30 07:10:16,535 - learning_rate: 0.001
|
29 |
+
2024-07-30 07:10:16,535 - batch_size: 128
|
30 |
+
2024-07-30 07:10:16,535 - patience: 5
|
31 |
+
2024-07-30 07:10:16,535 - scheduler_patience: 3
|
32 |
+
2024-07-30 07:10:16,535 - teacher_forcing_ratio: 0.5
|
33 |
+
2024-07-30 07:10:16,535 ----------------------------------------------------------------------------------------------------
|
34 |
+
2024-07-30 07:10:16,535 Computational Parameters:
|
35 |
+
2024-07-30 07:10:16,535 - num_workers: 4
|
36 |
+
2024-07-30 07:10:16,535 - device: device(type='cuda', index=0)
|
37 |
+
2024-07-30 07:10:16,535 ----------------------------------------------------------------------------------------------------
|
38 |
+
2024-07-30 07:10:16,535 Dataset Splits:
|
39 |
+
2024-07-30 07:10:16,535 - train: 129388 data points
|
40 |
+
2024-07-30 07:10:16,535 - dev: 18485 data points
|
41 |
+
2024-07-30 07:10:16,535 - test: 36969 data points
|
42 |
+
2024-07-30 07:10:16,535 ----------------------------------------------------------------------------------------------------
|
43 |
+
2024-07-30 07:10:16,535 EPOCH 1
|
44 |
+
2024-07-30 07:10:32,991 batch 101/1011 - loss 6.50782995 - lr 0.0010 - time 16.46s
|
45 |
+
2024-07-30 07:10:49,984 batch 202/1011 - loss 6.17545159 - lr 0.0010 - time 33.45s
|
46 |
+
2024-07-30 07:11:06,465 batch 303/1011 - loss 5.94376483 - lr 0.0010 - time 49.93s
|
47 |
+
2024-07-30 07:11:23,192 batch 404/1011 - loss 5.73313509 - lr 0.0010 - time 66.66s
|
48 |
+
2024-07-30 07:11:39,578 batch 505/1011 - loss 5.55448845 - lr 0.0010 - time 83.04s
|
49 |
+
2024-07-30 07:11:56,136 batch 606/1011 - loss 5.39707022 - lr 0.0010 - time 99.60s
|
50 |
+
2024-07-30 07:12:12,315 batch 707/1011 - loss 5.26315545 - lr 0.0010 - time 115.78s
|
51 |
+
2024-07-30 07:12:28,774 batch 808/1011 - loss 5.13977151 - lr 0.0010 - time 132.24s
|
52 |
+
2024-07-30 07:12:45,154 batch 909/1011 - loss 5.03251663 - lr 0.0010 - time 148.62s
|
53 |
+
2024-07-30 07:13:01,519 batch 1010/1011 - loss 4.93861844 - lr 0.0010 - time 164.98s
|
54 |
+
2024-07-30 07:13:01,758 ----------------------------------------------------------------------------------------------------
|
55 |
+
2024-07-30 07:13:01,758 EPOCH 1 DONE
|
56 |
+
2024-07-30 07:13:10,010 TRAIN Loss: 4.9374
|
57 |
+
2024-07-30 07:13:10,011 DEV Loss: 5.5330
|
58 |
+
2024-07-30 07:13:10,011 DEV Perplexity: 252.9011
|
59 |
+
2024-07-30 07:13:10,011 New best score!
|
60 |
+
2024-07-30 07:13:10,012 ----------------------------------------------------------------------------------------------------
|
61 |
+
2024-07-30 07:13:10,012 EPOCH 2
|
62 |
+
2024-07-30 07:13:26,520 batch 101/1011 - loss 3.91592840 - lr 0.0010 - time 16.51s
|
63 |
+
2024-07-30 07:13:43,402 batch 202/1011 - loss 3.88949036 - lr 0.0010 - time 33.39s
|
64 |
+
2024-07-30 07:14:00,490 batch 303/1011 - loss 3.85380996 - lr 0.0010 - time 50.48s
|
65 |
+
2024-07-30 07:14:16,500 batch 404/1011 - loss 3.83005397 - lr 0.0010 - time 66.49s
|
66 |
+
2024-07-30 07:14:33,593 batch 505/1011 - loss 3.81018029 - lr 0.0010 - time 83.58s
|
67 |
+
2024-07-30 07:14:49,686 batch 606/1011 - loss 3.78961014 - lr 0.0010 - time 99.67s
|
68 |
+
2024-07-30 07:15:05,752 batch 707/1011 - loss 3.76683983 - lr 0.0010 - time 115.74s
|
69 |
+
2024-07-30 07:15:22,525 batch 808/1011 - loss 3.75370269 - lr 0.0010 - time 132.51s
|
70 |
+
2024-07-30 07:15:39,501 batch 909/1011 - loss 3.73538133 - lr 0.0010 - time 149.49s
|
71 |
+
2024-07-30 07:15:56,252 batch 1010/1011 - loss 3.71858650 - lr 0.0010 - time 166.24s
|
72 |
+
2024-07-30 07:15:56,559 ----------------------------------------------------------------------------------------------------
|
73 |
+
2024-07-30 07:15:56,559 EPOCH 2 DONE
|
74 |
+
2024-07-30 07:16:04,852 TRAIN Loss: 3.7185
|
75 |
+
2024-07-30 07:16:04,852 DEV Loss: 5.3348
|
76 |
+
2024-07-30 07:16:04,852 DEV Perplexity: 207.4382
|
77 |
+
2024-07-30 07:16:04,852 New best score!
|
78 |
+
2024-07-30 07:16:04,853 ----------------------------------------------------------------------------------------------------
|
79 |
+
2024-07-30 07:16:04,853 EPOCH 3
|
80 |
+
2024-07-30 07:16:21,161 batch 101/1011 - loss 3.44147283 - lr 0.0010 - time 16.31s
|
81 |
+
2024-07-30 07:16:37,507 batch 202/1011 - loss 3.43586244 - lr 0.0010 - time 32.65s
|
82 |
+
2024-07-30 07:16:55,176 batch 303/1011 - loss 3.42241322 - lr 0.0010 - time 50.32s
|
83 |
+
2024-07-30 07:17:11,551 batch 404/1011 - loss 3.40677823 - lr 0.0010 - time 66.70s
|
84 |
+
2024-07-30 07:17:28,122 batch 505/1011 - loss 3.40507821 - lr 0.0010 - time 83.27s
|
85 |
+
2024-07-30 07:17:44,546 batch 606/1011 - loss 3.39510626 - lr 0.0010 - time 99.69s
|
86 |
+
2024-07-30 07:18:01,353 batch 707/1011 - loss 3.39356401 - lr 0.0010 - time 116.50s
|
87 |
+
2024-07-30 07:18:17,985 batch 808/1011 - loss 3.38208187 - lr 0.0010 - time 133.13s
|
88 |
+
2024-07-30 07:18:35,051 batch 909/1011 - loss 3.37666082 - lr 0.0010 - time 150.20s
|
89 |
+
2024-07-30 07:18:51,489 batch 1010/1011 - loss 3.37086030 - lr 0.0010 - time 166.64s
|
90 |
+
2024-07-30 07:18:51,752 ----------------------------------------------------------------------------------------------------
|
91 |
+
2024-07-30 07:18:51,753 EPOCH 3 DONE
|
92 |
+
2024-07-30 07:19:00,175 TRAIN Loss: 3.3708
|
93 |
+
2024-07-30 07:19:00,175 DEV Loss: 5.3519
|
94 |
+
2024-07-30 07:19:00,175 DEV Perplexity: 211.0040
|
95 |
+
2024-07-30 07:19:00,175 No improvement for 1 epoch(s)
|
96 |
+
2024-07-30 07:19:00,175 ----------------------------------------------------------------------------------------------------
|
97 |
+
2024-07-30 07:19:00,175 EPOCH 4
|
98 |
+
2024-07-30 07:19:17,109 batch 101/1011 - loss 3.18789170 - lr 0.0010 - time 16.93s
|
99 |
+
2024-07-30 07:19:33,344 batch 202/1011 - loss 3.18578338 - lr 0.0010 - time 33.17s
|
100 |
+
2024-07-30 07:19:50,417 batch 303/1011 - loss 3.19206579 - lr 0.0010 - time 50.24s
|
101 |
+
2024-07-30 07:20:07,280 batch 404/1011 - loss 3.19076064 - lr 0.0010 - time 67.10s
|
102 |
+
2024-07-30 07:20:23,955 batch 505/1011 - loss 3.18914108 - lr 0.0010 - time 83.78s
|
103 |
+
2024-07-30 07:20:40,344 batch 606/1011 - loss 3.18379935 - lr 0.0010 - time 100.17s
|
104 |
+
2024-07-30 07:20:56,791 batch 707/1011 - loss 3.18416267 - lr 0.0010 - time 116.62s
|
105 |
+
2024-07-30 07:21:13,779 batch 808/1011 - loss 3.17604217 - lr 0.0010 - time 133.60s
|
106 |
+
2024-07-30 07:21:30,065 batch 909/1011 - loss 3.17482127 - lr 0.0010 - time 149.89s
|
107 |
+
2024-07-30 07:21:46,839 batch 1010/1011 - loss 3.17885105 - lr 0.0010 - time 166.66s
|
108 |
+
2024-07-30 07:21:47,072 ----------------------------------------------------------------------------------------------------
|
109 |
+
2024-07-30 07:21:47,072 EPOCH 4 DONE
|
110 |
+
2024-07-30 07:21:55,394 TRAIN Loss: 3.1792
|
111 |
+
2024-07-30 07:21:55,394 DEV Loss: 5.3165
|
112 |
+
2024-07-30 07:21:55,394 DEV Perplexity: 203.6758
|
113 |
+
2024-07-30 07:21:55,394 New best score!
|
114 |
+
2024-07-30 07:21:55,395 ----------------------------------------------------------------------------------------------------
|
115 |
+
2024-07-30 07:21:55,395 EPOCH 5
|
116 |
+
2024-07-30 07:22:12,524 batch 101/1011 - loss 3.02969086 - lr 0.0010 - time 17.13s
|
117 |
+
2024-07-30 07:22:28,603 batch 202/1011 - loss 3.01618758 - lr 0.0010 - time 33.21s
|
118 |
+
2024-07-30 07:22:45,369 batch 303/1011 - loss 3.02890778 - lr 0.0010 - time 49.97s
|
119 |
+
2024-07-30 07:23:01,887 batch 404/1011 - loss 3.02615853 - lr 0.0010 - time 66.49s
|
120 |
+
2024-07-30 07:23:18,321 batch 505/1011 - loss 3.02958812 - lr 0.0010 - time 82.93s
|
121 |
+
2024-07-30 07:23:34,574 batch 606/1011 - loss 3.02728296 - lr 0.0010 - time 99.18s
|
122 |
+
2024-07-30 07:23:51,298 batch 707/1011 - loss 3.03255639 - lr 0.0010 - time 115.90s
|
123 |
+
2024-07-30 07:24:08,373 batch 808/1011 - loss 3.03638340 - lr 0.0010 - time 132.98s
|
124 |
+
2024-07-30 07:24:25,062 batch 909/1011 - loss 3.03770205 - lr 0.0010 - time 149.67s
|
125 |
+
2024-07-30 07:24:42,329 batch 1010/1011 - loss 3.04199003 - lr 0.0010 - time 166.93s
|
126 |
+
2024-07-30 07:24:42,563 ----------------------------------------------------------------------------------------------------
|
127 |
+
2024-07-30 07:24:42,564 EPOCH 5 DONE
|
128 |
+
2024-07-30 07:24:50,961 TRAIN Loss: 3.0422
|
129 |
+
2024-07-30 07:24:50,961 DEV Loss: 5.4327
|
130 |
+
2024-07-30 07:24:50,961 DEV Perplexity: 228.7737
|
131 |
+
2024-07-30 07:24:50,961 No improvement for 1 epoch(s)
|
132 |
+
2024-07-30 07:24:50,961 ----------------------------------------------------------------------------------------------------
|
133 |
+
2024-07-30 07:24:50,961 EPOCH 6
|
134 |
+
2024-07-30 07:25:07,647 batch 101/1011 - loss 2.90331172 - lr 0.0010 - time 16.69s
|
135 |
+
2024-07-30 07:25:24,545 batch 202/1011 - loss 2.92709784 - lr 0.0010 - time 33.58s
|
136 |
+
2024-07-30 07:25:41,165 batch 303/1011 - loss 2.93089474 - lr 0.0010 - time 50.20s
|
137 |
+
2024-07-30 07:25:58,312 batch 404/1011 - loss 2.93767107 - lr 0.0010 - time 67.35s
|
138 |
+
2024-07-30 07:26:14,189 batch 505/1011 - loss 2.92998656 - lr 0.0010 - time 83.23s
|
139 |
+
2024-07-30 07:26:30,956 batch 606/1011 - loss 2.93550903 - lr 0.0010 - time 99.99s
|
140 |
+
2024-07-30 07:26:47,686 batch 707/1011 - loss 2.94189070 - lr 0.0010 - time 116.73s
|
141 |
+
2024-07-30 07:27:04,148 batch 808/1011 - loss 2.94585439 - lr 0.0010 - time 133.19s
|
142 |
+
2024-07-30 07:27:21,463 batch 909/1011 - loss 2.94453523 - lr 0.0010 - time 150.50s
|
143 |
+
2024-07-30 07:27:38,292 batch 1010/1011 - loss 2.94513276 - lr 0.0010 - time 167.33s
|
144 |
+
2024-07-30 07:27:38,531 ----------------------------------------------------------------------------------------------------
|
145 |
+
2024-07-30 07:27:38,531 EPOCH 6 DONE
|
146 |
+
2024-07-30 07:27:46,948 TRAIN Loss: 2.9452
|
147 |
+
2024-07-30 07:27:46,948 DEV Loss: 5.2722
|
148 |
+
2024-07-30 07:27:46,948 DEV Perplexity: 194.8435
|
149 |
+
2024-07-30 07:27:46,948 New best score!
|
150 |
+
2024-07-30 07:27:46,949 ----------------------------------------------------------------------------------------------------
|
151 |
+
2024-07-30 07:27:46,949 EPOCH 7
|
152 |
+
2024-07-30 07:28:03,466 batch 101/1011 - loss 2.81148494 - lr 0.0010 - time 16.52s
|
153 |
+
2024-07-30 07:28:20,221 batch 202/1011 - loss 2.81311450 - lr 0.0010 - time 33.27s
|
154 |
+
2024-07-30 07:28:37,215 batch 303/1011 - loss 2.84423899 - lr 0.0010 - time 50.27s
|
155 |
+
2024-07-30 07:28:54,166 batch 404/1011 - loss 2.85005298 - lr 0.0010 - time 67.22s
|
156 |
+
2024-07-30 07:29:10,460 batch 505/1011 - loss 2.85979048 - lr 0.0010 - time 83.51s
|
157 |
+
2024-07-30 07:29:27,066 batch 606/1011 - loss 2.86354838 - lr 0.0010 - time 100.12s
|
158 |
+
2024-07-30 07:29:44,198 batch 707/1011 - loss 2.86006280 - lr 0.0010 - time 117.25s
|
159 |
+
2024-07-30 07:30:01,257 batch 808/1011 - loss 2.86423382 - lr 0.0010 - time 134.31s
|
160 |
+
2024-07-30 07:30:18,073 batch 909/1011 - loss 2.86635932 - lr 0.0010 - time 151.12s
|
161 |
+
2024-07-30 07:30:34,802 batch 1010/1011 - loss 2.86598854 - lr 0.0010 - time 167.85s
|
162 |
+
2024-07-30 07:30:35,109 ----------------------------------------------------------------------------------------------------
|
163 |
+
2024-07-30 07:30:35,110 EPOCH 7 DONE
|
164 |
+
2024-07-30 07:30:43,677 TRAIN Loss: 2.8661
|
165 |
+
2024-07-30 07:30:43,678 DEV Loss: 5.4299
|
166 |
+
2024-07-30 07:30:43,678 DEV Perplexity: 228.1182
|
167 |
+
2024-07-30 07:30:43,678 No improvement for 1 epoch(s)
|
168 |
+
2024-07-30 07:30:43,678 ----------------------------------------------------------------------------------------------------
|
169 |
+
2024-07-30 07:30:43,678 EPOCH 8
|
170 |
+
2024-07-30 07:31:00,444 batch 101/1011 - loss 2.72034275 - lr 0.0010 - time 16.77s
|
171 |
+
2024-07-30 07:31:17,639 batch 202/1011 - loss 2.74599545 - lr 0.0010 - time 33.96s
|
172 |
+
2024-07-30 07:31:34,290 batch 303/1011 - loss 2.75750461 - lr 0.0010 - time 50.61s
|
173 |
+
2024-07-30 07:31:51,395 batch 404/1011 - loss 2.76567207 - lr 0.0010 - time 67.72s
|
174 |
+
2024-07-30 07:32:07,809 batch 505/1011 - loss 2.76154437 - lr 0.0010 - time 84.13s
|
175 |
+
2024-07-30 07:32:24,367 batch 606/1011 - loss 2.76728634 - lr 0.0010 - time 100.69s
|
176 |
+
2024-07-30 07:32:40,430 batch 707/1011 - loss 2.77496423 - lr 0.0010 - time 116.75s
|
177 |
+
2024-07-30 07:32:57,019 batch 808/1011 - loss 2.78151873 - lr 0.0010 - time 133.34s
|
178 |
+
2024-07-30 07:33:14,015 batch 909/1011 - loss 2.78925569 - lr 0.0010 - time 150.34s
|
179 |
+
2024-07-30 07:33:30,765 batch 1010/1011 - loss 2.79629478 - lr 0.0010 - time 167.09s
|
180 |
+
2024-07-30 07:33:30,992 ----------------------------------------------------------------------------------------------------
|
181 |
+
2024-07-30 07:33:30,993 EPOCH 8 DONE
|
182 |
+
2024-07-30 07:33:39,246 TRAIN Loss: 2.7968
|
183 |
+
2024-07-30 07:33:39,247 DEV Loss: 5.3468
|
184 |
+
2024-07-30 07:33:39,247 DEV Perplexity: 209.9432
|
185 |
+
2024-07-30 07:33:39,247 No improvement for 2 epoch(s)
|
186 |
+
2024-07-30 07:33:39,247 ----------------------------------------------------------------------------------------------------
|
187 |
+
2024-07-30 07:33:39,247 EPOCH 9
|
188 |
+
2024-07-30 07:33:56,016 batch 101/1011 - loss 2.68007979 - lr 0.0010 - time 16.77s
|
189 |
+
2024-07-30 07:34:12,343 batch 202/1011 - loss 2.70012787 - lr 0.0010 - time 33.10s
|
190 |
+
2024-07-30 07:34:28,790 batch 303/1011 - loss 2.70781142 - lr 0.0010 - time 49.54s
|
191 |
+
2024-07-30 07:34:45,942 batch 404/1011 - loss 2.71594009 - lr 0.0010 - time 66.69s
|
192 |
+
2024-07-30 07:35:02,646 batch 505/1011 - loss 2.72310795 - lr 0.0010 - time 83.40s
|
193 |
+
2024-07-30 07:35:19,165 batch 606/1011 - loss 2.72328499 - lr 0.0010 - time 99.92s
|
194 |
+
2024-07-30 07:35:36,017 batch 707/1011 - loss 2.73172001 - lr 0.0010 - time 116.77s
|
195 |
+
2024-07-30 07:35:52,856 batch 808/1011 - loss 2.73808224 - lr 0.0010 - time 133.61s
|
196 |
+
2024-07-30 07:36:09,783 batch 909/1011 - loss 2.74454487 - lr 0.0010 - time 150.54s
|
197 |
+
2024-07-30 07:36:27,116 batch 1010/1011 - loss 2.74490420 - lr 0.0010 - time 167.87s
|
198 |
+
2024-07-30 07:36:27,342 ----------------------------------------------------------------------------------------------------
|
199 |
+
2024-07-30 07:36:27,343 EPOCH 9 DONE
|
200 |
+
2024-07-30 07:36:35,636 TRAIN Loss: 2.7451
|
201 |
+
2024-07-30 07:36:35,637 DEV Loss: 5.2812
|
202 |
+
2024-07-30 07:36:35,637 DEV Perplexity: 196.6025
|
203 |
+
2024-07-30 07:36:35,637 No improvement for 3 epoch(s)
|
204 |
+
2024-07-30 07:36:35,637 ----------------------------------------------------------------------------------------------------
|
205 |
+
2024-07-30 07:36:35,637 EPOCH 10
|
206 |
+
2024-07-30 07:36:52,589 batch 101/1011 - loss 2.63776152 - lr 0.0010 - time 16.95s
|
207 |
+
2024-07-30 07:37:09,468 batch 202/1011 - loss 2.64122139 - lr 0.0010 - time 33.83s
|
208 |
+
2024-07-30 07:37:26,576 batch 303/1011 - loss 2.64583885 - lr 0.0010 - time 50.94s
|
209 |
+
2024-07-30 07:37:42,924 batch 404/1011 - loss 2.65515307 - lr 0.0010 - time 67.29s
|
210 |
+
2024-07-30 07:37:59,687 batch 505/1011 - loss 2.65460593 - lr 0.0010 - time 84.05s
|
211 |
+
2024-07-30 07:38:15,777 batch 606/1011 - loss 2.65854359 - lr 0.0010 - time 100.14s
|
212 |
+
2024-07-30 07:38:31,897 batch 707/1011 - loss 2.66482458 - lr 0.0010 - time 116.26s
|
213 |
+
2024-07-30 07:38:48,541 batch 808/1011 - loss 2.67462481 - lr 0.0010 - time 132.90s
|
214 |
+
2024-07-30 07:39:05,108 batch 909/1011 - loss 2.68112300 - lr 0.0010 - time 149.47s
|
215 |
+
2024-07-30 07:39:22,478 batch 1010/1011 - loss 2.68636602 - lr 0.0010 - time 166.84s
|
216 |
+
2024-07-30 07:39:22,729 ----------------------------------------------------------------------------------------------------
|
217 |
+
2024-07-30 07:39:22,730 EPOCH 10 DONE
|
218 |
+
2024-07-30 07:39:31,021 TRAIN Loss: 2.6868
|
219 |
+
2024-07-30 07:39:31,021 DEV Loss: 5.2771
|
220 |
+
2024-07-30 07:39:31,021 DEV Perplexity: 195.7993
|
221 |
+
2024-07-30 07:39:31,021 No improvement for 4 epoch(s)
|
222 |
+
2024-07-30 07:39:31,021 ----------------------------------------------------------------------------------------------------
|
223 |
+
2024-07-30 07:39:31,021 Finished Training
|
224 |
+
2024-07-30 07:39:47,476 TEST Perplexity: 197.5891
|
225 |
+
2024-07-30 07:49:48,025 TEST BLEU = 30.56 85.4/67.0/21.8/7.0 (BP = 1.000 ratio = 1.000 hyp_len = 89 ref_len = 89)
|
models/en2sv/word_word2vec_embeddings_with_attention/model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6f41fb8f0630b880eefe0d6aecb6d2f1ac5199dd6419e869fda3ee31870f1766
|
3 |
+
size 101929320
|
models/en2sv/word_word2vec_embeddings_without_attention/log.txt
ADDED
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2024-07-30 07:49:57,719 ----------------------------------------------------------------------------------------------------
|
2 |
+
2024-07-30 07:49:57,719 Training Model
|
3 |
+
2024-07-30 07:49:57,719 ----------------------------------------------------------------------------------------------------
|
4 |
+
2024-07-30 07:49:57,719 Translator(
|
5 |
+
(encoder): EncoderLSTM(
|
6 |
+
(embedding): Embedding(13968, 300, padding_idx=13963)
|
7 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
8 |
+
(lstm): LSTM(300, 512, batch_first=True, bidirectional=True)
|
9 |
+
)
|
10 |
+
(decoder): DecoderLSTM(
|
11 |
+
(embedding): Embedding(21119, 300, padding_idx=21114)
|
12 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
13 |
+
(lstm): LSTM(300, 1024, batch_first=True)
|
14 |
+
(hidden2vocab): Linear(in_features=1024, out_features=21119, bias=True)
|
15 |
+
(log_softmax): LogSoftmax(dim=-1)
|
16 |
+
)
|
17 |
+
)
|
18 |
+
2024-07-30 07:49:57,719 ----------------------------------------------------------------------------------------------------
|
19 |
+
2024-07-30 07:49:57,719 Training Hyperparameters:
|
20 |
+
2024-07-30 07:49:57,719 - max_epochs: 10
|
21 |
+
2024-07-30 07:49:57,719 - learning_rate: 0.001
|
22 |
+
2024-07-30 07:49:57,719 - batch_size: 128
|
23 |
+
2024-07-30 07:49:57,719 - patience: 5
|
24 |
+
2024-07-30 07:49:57,719 - scheduler_patience: 3
|
25 |
+
2024-07-30 07:49:57,720 - teacher_forcing_ratio: 0.5
|
26 |
+
2024-07-30 07:49:57,720 ----------------------------------------------------------------------------------------------------
|
27 |
+
2024-07-30 07:49:57,720 Computational Parameters:
|
28 |
+
2024-07-30 07:49:57,720 - num_workers: 4
|
29 |
+
2024-07-30 07:49:57,720 - device: device(type='cuda', index=0)
|
30 |
+
2024-07-30 07:49:57,720 ----------------------------------------------------------------------------------------------------
|
31 |
+
2024-07-30 07:49:57,720 Dataset Splits:
|
32 |
+
2024-07-30 07:49:57,720 - train: 129388 data points
|
33 |
+
2024-07-30 07:49:57,720 - dev: 18485 data points
|
34 |
+
2024-07-30 07:49:57,720 - test: 36969 data points
|
35 |
+
2024-07-30 07:49:57,720 ----------------------------------------------------------------------------------------------------
|
36 |
+
2024-07-30 07:49:57,720 EPOCH 1
|
37 |
+
2024-07-30 07:50:17,576 batch 101/1011 - loss 6.42171655 - lr 0.0010 - time 19.86s
|
38 |
+
2024-07-30 07:50:37,633 batch 202/1011 - loss 6.10246437 - lr 0.0010 - time 39.91s
|
39 |
+
2024-07-30 07:50:57,258 batch 303/1011 - loss 5.90702323 - lr 0.0010 - time 59.54s
|
40 |
+
2024-07-30 07:51:16,164 batch 404/1011 - loss 5.76726539 - lr 0.0010 - time 78.44s
|
41 |
+
2024-07-30 07:51:36,146 batch 505/1011 - loss 5.65202873 - lr 0.0010 - time 98.43s
|
42 |
+
2024-07-30 07:51:56,120 batch 606/1011 - loss 5.55423080 - lr 0.0010 - time 118.40s
|
43 |
+
2024-07-30 07:52:16,504 batch 707/1011 - loss 5.47423567 - lr 0.0010 - time 138.78s
|
44 |
+
2024-07-30 07:52:36,386 batch 808/1011 - loss 5.40244888 - lr 0.0010 - time 158.67s
|
45 |
+
2024-07-30 07:52:56,142 batch 909/1011 - loss 5.33781291 - lr 0.0010 - time 178.42s
|
46 |
+
2024-07-30 07:53:16,028 batch 1010/1011 - loss 5.28215873 - lr 0.0010 - time 198.31s
|
47 |
+
2024-07-30 07:53:16,322 ----------------------------------------------------------------------------------------------------
|
48 |
+
2024-07-30 07:53:16,322 EPOCH 1 DONE
|
49 |
+
2024-07-30 07:53:26,788 TRAIN Loss: 5.2816
|
50 |
+
2024-07-30 07:53:26,788 DEV Loss: 5.4936
|
51 |
+
2024-07-30 07:53:26,788 DEV Perplexity: 243.1199
|
52 |
+
2024-07-30 07:53:26,788 New best score!
|
53 |
+
2024-07-30 07:53:26,789 ----------------------------------------------------------------------------------------------------
|
54 |
+
2024-07-30 07:53:26,789 EPOCH 2
|
55 |
+
2024-07-30 07:53:46,961 batch 101/1011 - loss 4.59855071 - lr 0.0010 - time 20.17s
|
56 |
+
2024-07-30 07:54:06,655 batch 202/1011 - loss 4.60593028 - lr 0.0010 - time 39.87s
|
57 |
+
2024-07-30 07:54:26,708 batch 303/1011 - loss 4.58944668 - lr 0.0010 - time 59.92s
|
58 |
+
2024-07-30 07:54:46,791 batch 404/1011 - loss 4.57652512 - lr 0.0010 - time 80.00s
|
59 |
+
2024-07-30 07:55:06,302 batch 505/1011 - loss 4.55695529 - lr 0.0010 - time 99.51s
|
60 |
+
2024-07-30 07:55:25,835 batch 606/1011 - loss 4.54100491 - lr 0.0010 - time 119.05s
|
61 |
+
2024-07-30 07:55:45,895 batch 707/1011 - loss 4.52544479 - lr 0.0010 - time 139.11s
|
62 |
+
2024-07-30 07:56:05,867 batch 808/1011 - loss 4.51644398 - lr 0.0010 - time 159.08s
|
63 |
+
2024-07-30 07:56:25,697 batch 909/1011 - loss 4.50430690 - lr 0.0010 - time 178.91s
|
64 |
+
2024-07-30 07:56:45,147 batch 1010/1011 - loss 4.49178945 - lr 0.0010 - time 198.36s
|
65 |
+
2024-07-30 07:56:45,413 ----------------------------------------------------------------------------------------------------
|
66 |
+
2024-07-30 07:56:45,413 EPOCH 2 DONE
|
67 |
+
2024-07-30 07:56:56,014 TRAIN Loss: 4.4913
|
68 |
+
2024-07-30 07:56:56,014 DEV Loss: 5.2925
|
69 |
+
2024-07-30 07:56:56,015 DEV Perplexity: 198.8437
|
70 |
+
2024-07-30 07:56:56,015 New best score!
|
71 |
+
2024-07-30 07:56:56,015 ----------------------------------------------------------------------------------------------------
|
72 |
+
2024-07-30 07:56:56,015 EPOCH 3
|
73 |
+
2024-07-30 07:57:15,856 batch 101/1011 - loss 4.18290199 - lr 0.0010 - time 19.84s
|
74 |
+
2024-07-30 07:57:35,316 batch 202/1011 - loss 4.18722084 - lr 0.0010 - time 39.30s
|
75 |
+
2024-07-30 07:57:55,411 batch 303/1011 - loss 4.19086002 - lr 0.0010 - time 59.40s
|
76 |
+
2024-07-30 07:58:15,410 batch 404/1011 - loss 4.18455364 - lr 0.0010 - time 79.39s
|
77 |
+
2024-07-30 07:58:35,720 batch 505/1011 - loss 4.18117821 - lr 0.0010 - time 99.70s
|
78 |
+
2024-07-30 07:58:55,480 batch 606/1011 - loss 4.17456487 - lr 0.0010 - time 119.46s
|
79 |
+
2024-07-30 07:59:15,680 batch 707/1011 - loss 4.16952627 - lr 0.0010 - time 139.66s
|
80 |
+
2024-07-30 07:59:35,985 batch 808/1011 - loss 4.16762929 - lr 0.0010 - time 159.97s
|
81 |
+
2024-07-30 07:59:55,975 batch 909/1011 - loss 4.16291718 - lr 0.0010 - time 179.96s
|
82 |
+
2024-07-30 08:00:16,299 batch 1010/1011 - loss 4.16020582 - lr 0.0010 - time 200.28s
|
83 |
+
2024-07-30 08:00:16,570 ----------------------------------------------------------------------------------------------------
|
84 |
+
2024-07-30 08:00:16,570 EPOCH 3 DONE
|
85 |
+
2024-07-30 08:00:27,261 TRAIN Loss: 4.1603
|
86 |
+
2024-07-30 08:00:27,262 DEV Loss: 5.2196
|
87 |
+
2024-07-30 08:00:27,262 DEV Perplexity: 184.8564
|
88 |
+
2024-07-30 08:00:27,262 New best score!
|
89 |
+
2024-07-30 08:00:27,263 ----------------------------------------------------------------------------------------------------
|
90 |
+
2024-07-30 08:00:27,263 EPOCH 4
|
91 |
+
2024-07-30 08:00:46,754 batch 101/1011 - loss 3.90546866 - lr 0.0010 - time 19.49s
|
92 |
+
2024-07-30 08:01:06,774 batch 202/1011 - loss 3.91532035 - lr 0.0010 - time 39.51s
|
93 |
+
2024-07-30 08:01:27,064 batch 303/1011 - loss 3.91972694 - lr 0.0010 - time 59.80s
|
94 |
+
2024-07-30 08:01:47,109 batch 404/1011 - loss 3.92822919 - lr 0.0010 - time 79.85s
|
95 |
+
2024-07-30 08:02:07,497 batch 505/1011 - loss 3.93589169 - lr 0.0010 - time 100.23s
|
96 |
+
2024-07-30 08:02:27,467 batch 606/1011 - loss 3.93288693 - lr 0.0010 - time 120.20s
|
97 |
+
2024-07-30 08:02:47,536 batch 707/1011 - loss 3.93106809 - lr 0.0010 - time 140.27s
|
98 |
+
2024-07-30 08:03:07,726 batch 808/1011 - loss 3.92933554 - lr 0.0010 - time 160.46s
|
99 |
+
2024-07-30 08:03:28,108 batch 909/1011 - loss 3.93086966 - lr 0.0010 - time 180.85s
|
100 |
+
2024-07-30 08:03:48,469 batch 1010/1011 - loss 3.93176247 - lr 0.0010 - time 201.21s
|
101 |
+
2024-07-30 08:03:48,750 ----------------------------------------------------------------------------------------------------
|
102 |
+
2024-07-30 08:03:48,751 EPOCH 4 DONE
|
103 |
+
2024-07-30 08:03:59,395 TRAIN Loss: 3.9318
|
104 |
+
2024-07-30 08:03:59,395 DEV Loss: 5.1497
|
105 |
+
2024-07-30 08:03:59,395 DEV Perplexity: 172.3834
|
106 |
+
2024-07-30 08:03:59,395 New best score!
|
107 |
+
2024-07-30 08:03:59,396 ----------------------------------------------------------------------------------------------------
|
108 |
+
2024-07-30 08:03:59,396 EPOCH 5
|
109 |
+
2024-07-30 08:04:19,728 batch 101/1011 - loss 3.72514373 - lr 0.0010 - time 20.33s
|
110 |
+
2024-07-30 08:04:39,711 batch 202/1011 - loss 3.72623490 - lr 0.0010 - time 40.31s
|
111 |
+
2024-07-30 08:04:59,221 batch 303/1011 - loss 3.72435995 - lr 0.0010 - time 59.82s
|
112 |
+
2024-07-30 08:05:18,979 batch 404/1011 - loss 3.73085776 - lr 0.0010 - time 79.58s
|
113 |
+
2024-07-30 08:05:38,960 batch 505/1011 - loss 3.73777321 - lr 0.0010 - time 99.56s
|
114 |
+
2024-07-30 08:05:58,794 batch 606/1011 - loss 3.74417599 - lr 0.0010 - time 119.40s
|
115 |
+
2024-07-30 08:06:18,976 batch 707/1011 - loss 3.74912578 - lr 0.0010 - time 139.58s
|
116 |
+
2024-07-30 08:06:38,816 batch 808/1011 - loss 3.75504288 - lr 0.0010 - time 159.42s
|
117 |
+
2024-07-30 08:06:58,719 batch 909/1011 - loss 3.76028142 - lr 0.0010 - time 179.32s
|
118 |
+
2024-07-30 08:07:18,650 batch 1010/1011 - loss 3.76056113 - lr 0.0010 - time 199.25s
|
119 |
+
2024-07-30 08:07:18,919 ----------------------------------------------------------------------------------------------------
|
120 |
+
2024-07-30 08:07:18,919 EPOCH 5 DONE
|
121 |
+
2024-07-30 08:07:29,543 TRAIN Loss: 3.7602
|
122 |
+
2024-07-30 08:07:29,543 DEV Loss: 5.2120
|
123 |
+
2024-07-30 08:07:29,543 DEV Perplexity: 183.4567
|
124 |
+
2024-07-30 08:07:29,543 No improvement for 1 epoch(s)
|
125 |
+
2024-07-30 08:07:29,543 ----------------------------------------------------------------------------------------------------
|
126 |
+
2024-07-30 08:07:29,543 EPOCH 6
|
127 |
+
2024-07-30 08:07:50,053 batch 101/1011 - loss 3.55638360 - lr 0.0010 - time 20.51s
|
128 |
+
2024-07-30 08:08:10,198 batch 202/1011 - loss 3.56639723 - lr 0.0010 - time 40.65s
|
129 |
+
2024-07-30 08:08:29,721 batch 303/1011 - loss 3.57349216 - lr 0.0010 - time 60.18s
|
130 |
+
2024-07-30 08:08:49,934 batch 404/1011 - loss 3.58833846 - lr 0.0010 - time 80.39s
|
131 |
+
2024-07-30 08:09:09,435 batch 505/1011 - loss 3.58958746 - lr 0.0010 - time 99.89s
|
132 |
+
2024-07-30 08:09:30,097 batch 606/1011 - loss 3.60088717 - lr 0.0010 - time 120.55s
|
133 |
+
2024-07-30 08:09:49,981 batch 707/1011 - loss 3.60393567 - lr 0.0010 - time 140.44s
|
134 |
+
2024-07-30 08:10:09,031 batch 808/1011 - loss 3.60727072 - lr 0.0010 - time 159.49s
|
135 |
+
2024-07-30 08:10:28,815 batch 909/1011 - loss 3.61329824 - lr 0.0010 - time 179.27s
|
136 |
+
2024-07-30 08:10:49,206 batch 1010/1011 - loss 3.61771751 - lr 0.0010 - time 199.66s
|
137 |
+
2024-07-30 08:10:49,504 ----------------------------------------------------------------------------------------------------
|
138 |
+
2024-07-30 08:10:49,505 EPOCH 6 DONE
|
139 |
+
2024-07-30 08:11:00,137 TRAIN Loss: 3.6178
|
140 |
+
2024-07-30 08:11:00,137 DEV Loss: 5.1882
|
141 |
+
2024-07-30 08:11:00,137 DEV Perplexity: 179.1424
|
142 |
+
2024-07-30 08:11:00,137 No improvement for 2 epoch(s)
|
143 |
+
2024-07-30 08:11:00,137 ----------------------------------------------------------------------------------------------------
|
144 |
+
2024-07-30 08:11:00,137 EPOCH 7
|
145 |
+
2024-07-30 08:11:19,987 batch 101/1011 - loss 3.40094850 - lr 0.0010 - time 19.85s
|
146 |
+
2024-07-30 08:11:39,942 batch 202/1011 - loss 3.40086490 - lr 0.0010 - time 39.80s
|
147 |
+
2024-07-30 08:12:00,061 batch 303/1011 - loss 3.43740855 - lr 0.0010 - time 59.92s
|
148 |
+
2024-07-30 08:12:19,789 batch 404/1011 - loss 3.44924256 - lr 0.0010 - time 79.65s
|
149 |
+
2024-07-30 08:12:39,918 batch 505/1011 - loss 3.46568360 - lr 0.0010 - time 99.78s
|
150 |
+
2024-07-30 08:12:59,487 batch 606/1011 - loss 3.47250895 - lr 0.0010 - time 119.35s
|
151 |
+
2024-07-30 08:13:19,481 batch 707/1011 - loss 3.47430346 - lr 0.0010 - time 139.34s
|
152 |
+
2024-07-30 08:13:38,627 batch 808/1011 - loss 3.48243240 - lr 0.0010 - time 158.49s
|
153 |
+
2024-07-30 08:13:58,935 batch 909/1011 - loss 3.49017983 - lr 0.0010 - time 178.80s
|
154 |
+
2024-07-30 08:14:19,435 batch 1010/1011 - loss 3.49518054 - lr 0.0010 - time 199.30s
|
155 |
+
2024-07-30 08:14:19,741 ----------------------------------------------------------------------------------------------------
|
156 |
+
2024-07-30 08:14:19,741 EPOCH 7 DONE
|
157 |
+
2024-07-30 08:14:30,474 TRAIN Loss: 3.4954
|
158 |
+
2024-07-30 08:14:30,474 DEV Loss: 5.2243
|
159 |
+
2024-07-30 08:14:30,474 DEV Perplexity: 185.7337
|
160 |
+
2024-07-30 08:14:30,474 No improvement for 3 epoch(s)
|
161 |
+
2024-07-30 08:14:30,474 ----------------------------------------------------------------------------------------------------
|
162 |
+
2024-07-30 08:14:30,474 EPOCH 8
|
163 |
+
2024-07-30 08:14:50,644 batch 101/1011 - loss 3.29612942 - lr 0.0010 - time 20.17s
|
164 |
+
2024-07-30 08:15:10,779 batch 202/1011 - loss 3.31207690 - lr 0.0010 - time 40.30s
|
165 |
+
2024-07-30 08:15:30,663 batch 303/1011 - loss 3.32827004 - lr 0.0010 - time 60.19s
|
166 |
+
2024-07-30 08:15:50,835 batch 404/1011 - loss 3.33670160 - lr 0.0010 - time 80.36s
|
167 |
+
2024-07-30 08:16:10,256 batch 505/1011 - loss 3.34433788 - lr 0.0010 - time 99.78s
|
168 |
+
2024-07-30 08:16:30,266 batch 606/1011 - loss 3.35457159 - lr 0.0010 - time 119.79s
|
169 |
+
2024-07-30 08:16:50,763 batch 707/1011 - loss 3.36854262 - lr 0.0010 - time 140.29s
|
170 |
+
2024-07-30 08:17:10,306 batch 808/1011 - loss 3.37828197 - lr 0.0010 - time 159.83s
|
171 |
+
2024-07-30 08:17:31,116 batch 909/1011 - loss 3.39000203 - lr 0.0010 - time 180.64s
|
172 |
+
2024-07-30 08:17:51,225 batch 1010/1011 - loss 3.39904549 - lr 0.0010 - time 200.75s
|
173 |
+
2024-07-30 08:17:51,517 ----------------------------------------------------------------------------------------------------
|
174 |
+
2024-07-30 08:17:51,518 EPOCH 8 DONE
|
175 |
+
2024-07-30 08:18:02,187 TRAIN Loss: 3.3988
|
176 |
+
2024-07-30 08:18:02,188 DEV Loss: 5.1878
|
177 |
+
2024-07-30 08:18:02,188 DEV Perplexity: 179.0801
|
178 |
+
2024-07-30 08:18:02,188 No improvement for 4 epoch(s)
|
179 |
+
2024-07-30 08:18:02,188 ----------------------------------------------------------------------------------------------------
|
180 |
+
2024-07-30 08:18:02,188 EPOCH 9
|
181 |
+
2024-07-30 08:18:22,470 batch 101/1011 - loss 3.22171754 - lr 0.0001 - time 20.28s
|
182 |
+
2024-07-30 08:18:42,417 batch 202/1011 - loss 3.18771996 - lr 0.0001 - time 40.23s
|
183 |
+
2024-07-30 08:19:02,432 batch 303/1011 - loss 3.17878461 - lr 0.0001 - time 60.24s
|
184 |
+
2024-07-30 08:19:22,296 batch 404/1011 - loss 3.17053868 - lr 0.0001 - time 80.11s
|
185 |
+
2024-07-30 08:19:41,834 batch 505/1011 - loss 3.16197544 - lr 0.0001 - time 99.65s
|
186 |
+
2024-07-30 08:20:02,029 batch 606/1011 - loss 3.15925771 - lr 0.0001 - time 119.84s
|
187 |
+
2024-07-30 08:20:21,712 batch 707/1011 - loss 3.15666988 - lr 0.0001 - time 139.52s
|
188 |
+
2024-07-30 08:20:41,942 batch 808/1011 - loss 3.15549632 - lr 0.0001 - time 159.75s
|
189 |
+
2024-07-30 08:21:02,281 batch 909/1011 - loss 3.15326503 - lr 0.0001 - time 180.09s
|
190 |
+
2024-07-30 08:21:21,763 batch 1010/1011 - loss 3.15102374 - lr 0.0001 - time 199.58s
|
191 |
+
2024-07-30 08:21:22,007 ----------------------------------------------------------------------------------------------------
|
192 |
+
2024-07-30 08:21:22,007 EPOCH 9 DONE
|
193 |
+
2024-07-30 08:21:32,845 TRAIN Loss: 3.1511
|
194 |
+
2024-07-30 08:21:32,845 DEV Loss: 5.2150
|
195 |
+
2024-07-30 08:21:32,845 DEV Perplexity: 184.0093
|
196 |
+
2024-07-30 08:21:32,845 No improvement for 5 epoch(s)
|
197 |
+
2024-07-30 08:21:32,845 Patience reached: Terminating model training due to early stopping
|
198 |
+
2024-07-30 08:21:32,845 ----------------------------------------------------------------------------------------------------
|
199 |
+
2024-07-30 08:21:32,846 Finished Training
|
200 |
+
2024-07-30 08:21:54,124 TEST Perplexity: 171.7510
|
201 |
+
2024-07-30 08:24:42,314 TEST BLEU = 32.84 94.6/69.9/25.0/7.0 (BP = 1.000 ratio = 1.000 hyp_len = 74 ref_len = 74)
|
models/en2sv/word_word2vec_embeddings_without_attention/model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2ef56f7a090dbc43d865e84e0e4686b8bc74e99d4e6a7813f5c2e838c1a18bd4
|
3 |
+
size 164806772
|