Dmitry Chaplinsky
commited on
Commit
•
d111a77
1
Parent(s):
6a92397
Updated model: 551 splits, 19.68 epochs, min_loss: 1.0161, min_ppl: 2.7625
Browse files- best-lm.pt +1 -1
- loss.txt +20 -0
best-lm.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 22791455
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e48444c28c499c5c9f695be8658a0859ffeac2471a31f5e93a452a7bd389feb0
|
3 |
size 22791455
|
loss.txt
CHANGED
@@ -529,3 +529,23 @@
|
|
529 |
| end of split 81 / 28 | epoch 17 | time: 3287.94s | valid loss 1.0162 | valid ppl 2.7627 | learning rate 0.3125
|
530 |
| end of split 82 / 28 | epoch 17 | time: 3278.46s | valid loss 1.0162 | valid ppl 2.7626 | learning rate 0.0781
|
531 |
| end of split 83 / 28 | epoch 17 | time: 3290.21s | valid loss 1.0162 | valid ppl 2.7626 | learning rate 0.0781
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
529 |
| end of split 81 / 28 | epoch 17 | time: 3287.94s | valid loss 1.0162 | valid ppl 2.7627 | learning rate 0.3125
|
530 |
| end of split 82 / 28 | epoch 17 | time: 3278.46s | valid loss 1.0162 | valid ppl 2.7626 | learning rate 0.0781
|
531 |
| end of split 83 / 28 | epoch 17 | time: 3290.21s | valid loss 1.0162 | valid ppl 2.7626 | learning rate 0.0781
|
532 |
+
| end of split 84 / 28 | epoch 17 | time: 3296.94s | valid loss 1.0162 | valid ppl 2.7626 | learning rate 0.0781
|
533 |
+
| end of split 85 / 28 | epoch 17 | time: 3201.69s | valid loss 1.0161 | valid ppl 2.7625 | learning rate 0.0781
|
534 |
+
| end of split 86 / 28 | epoch 17 | time: 26632.43s | valid loss 1.0161 | valid ppl 2.7625 | learning rate 0.0781
|
535 |
+
| end of split 87 / 28 | epoch 17 | time: 3289.17s | valid loss 1.0161 | valid ppl 2.7625 | learning rate 0.0781
|
536 |
+
| end of split 60 / 28 | epoch 18 | time: 3276.56s | valid loss 1.0161 | valid ppl 2.7625 | learning rate 0.0781
|
537 |
+
| end of split 61 / 28 | epoch 18 | time: 3295.10s | valid loss 1.0161 | valid ppl 2.7625 | learning rate 0.0781
|
538 |
+
| end of split 62 / 28 | epoch 18 | time: 3294.42s | valid loss 1.0161 | valid ppl 2.7625 | learning rate 0.0781
|
539 |
+
| end of split 63 / 28 | epoch 18 | time: 3260.23s | valid loss 1.0161 | valid ppl 2.7625 | learning rate 0.0781
|
540 |
+
| end of split 64 / 28 | epoch 18 | time: 3260.96s | valid loss 1.0161 | valid ppl 2.7625 | learning rate 0.0781
|
541 |
+
| end of split 65 / 28 | epoch 18 | time: 3265.06s | valid loss 1.0161 | valid ppl 2.7625 | learning rate 0.0781
|
542 |
+
| end of split 66 / 28 | epoch 18 | time: 3269.36s | valid loss 1.0161 | valid ppl 2.7625 | learning rate 0.0195
|
543 |
+
| end of split 67 / 28 | epoch 18 | time: 3269.99s | valid loss 1.0161 | valid ppl 2.7625 | learning rate 0.0195
|
544 |
+
| end of split 68 / 28 | epoch 18 | time: 3267.74s | valid loss 1.0161 | valid ppl 2.7625 | learning rate 0.0195
|
545 |
+
| end of split 69 / 28 | epoch 18 | time: 3276.93s | valid loss 1.0161 | valid ppl 2.7625 | learning rate 0.0195
|
546 |
+
| end of split 70 / 28 | epoch 18 | time: 3280.38s | valid loss 1.0161 | valid ppl 2.7625 | learning rate 0.0195
|
547 |
+
| end of split 71 / 28 | epoch 18 | time: 3281.48s | valid loss 1.0161 | valid ppl 2.7625 | learning rate 0.0195
|
548 |
+
| end of split 72 / 28 | epoch 18 | time: 3269.65s | valid loss 1.0161 | valid ppl 2.7625 | learning rate 0.0195
|
549 |
+
| end of split 73 / 28 | epoch 18 | time: 3276.15s | valid loss 1.0161 | valid ppl 2.7625 | learning rate 0.0195
|
550 |
+
| end of split 74 / 28 | epoch 18 | time: 3286.28s | valid loss 1.0161 | valid ppl 2.7625 | learning rate 0.0195
|
551 |
+
| end of split 75 / 28 | epoch 18 | time: 3286.31s | valid loss 1.0161 | valid ppl 2.7625 | learning rate 0.0195
|