Dmitry Chaplinsky
commited on
Commit
•
e827347
1
Parent(s):
574cad4
More iterations
Browse files- best-lm.pt +1 -1
- loss.txt +36 -0
best-lm.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 22791455
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f7e4ef8ab3dde9cbe77d453be3485e6d168382b39e7c2d0ebfeea1120ecbd931
|
3 |
size 22791455
|
loss.txt
CHANGED
@@ -386,3 +386,39 @@
|
|
386 |
| end of split 50 / 28 | epoch 13 | time: 3234.34s | valid loss 1.0193 | valid ppl 2.7713 | learning rate 5.0000
|
387 |
| end of split 51 / 28 | epoch 13 | time: 3244.24s | valid loss 1.0192 | valid ppl 2.7710 | learning rate 5.0000
|
388 |
| end of split 52 / 28 | epoch 13 | time: 3244.31s | valid loss 1.0192 | valid ppl 2.7710 | learning rate 5.0000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
386 |
| end of split 50 / 28 | epoch 13 | time: 3234.34s | valid loss 1.0193 | valid ppl 2.7713 | learning rate 5.0000
|
387 |
| end of split 51 / 28 | epoch 13 | time: 3244.24s | valid loss 1.0192 | valid ppl 2.7710 | learning rate 5.0000
|
388 |
| end of split 52 / 28 | epoch 13 | time: 3244.31s | valid loss 1.0192 | valid ppl 2.7710 | learning rate 5.0000
|
389 |
+
| end of split 53 / 28 | epoch 13 | time: 3242.26s | valid loss 1.0198 | valid ppl 2.7727 | learning rate 5.0000
|
390 |
+
| end of split 54 / 28 | epoch 13 | time: 3242.91s | valid loss 1.0192 | valid ppl 2.7710 | learning rate 5.0000
|
391 |
+
| end of split 55 / 28 | epoch 13 | time: 3244.44s | valid loss 1.0191 | valid ppl 2.7707 | learning rate 5.0000
|
392 |
+
| end of split 56 / 28 | epoch 13 | time: 3242.99s | valid loss 1.0191 | valid ppl 2.7707 | learning rate 5.0000
|
393 |
+
| end of split 57 / 28 | epoch 13 | time: 3246.09s | valid loss 1.0191 | valid ppl 2.7707 | learning rate 5.0000
|
394 |
+
| end of split 58 / 28 | epoch 13 | time: 3234.45s | valid loss 1.0191 | valid ppl 2.7706 | learning rate 5.0000
|
395 |
+
| end of split 59 / 28 | epoch 13 | time: 3234.01s | valid loss 1.0192 | valid ppl 2.7708 | learning rate 5.0000
|
396 |
+
| end of split 60 / 28 | epoch 13 | time: 3232.69s | valid loss 1.0190 | valid ppl 2.7705 | learning rate 5.0000
|
397 |
+
| end of split 61 / 28 | epoch 13 | time: 3242.40s | valid loss 1.0193 | valid ppl 2.7712 | learning rate 5.0000
|
398 |
+
| end of split 62 / 28 | epoch 13 | time: 3241.93s | valid loss 1.0190 | valid ppl 2.7704 | learning rate 5.0000
|
399 |
+
| end of split 63 / 28 | epoch 13 | time: 3246.64s | valid loss 1.0191 | valid ppl 2.7706 | learning rate 5.0000
|
400 |
+
| end of split 64 / 28 | epoch 13 | time: 3245.70s | valid loss 1.0190 | valid ppl 2.7703 | learning rate 5.0000
|
401 |
+
| end of split 65 / 28 | epoch 13 | time: 3245.13s | valid loss 1.0189 | valid ppl 2.7701 | learning rate 5.0000
|
402 |
+
| end of split 66 / 28 | epoch 13 | time: 3243.63s | valid loss 1.0189 | valid ppl 2.7702 | learning rate 5.0000
|
403 |
+
| end of split 67 / 28 | epoch 13 | time: 3251.70s | valid loss 1.0191 | valid ppl 2.7707 | learning rate 5.0000
|
404 |
+
| end of split 68 / 28 | epoch 13 | time: 3249.44s | valid loss 1.0192 | valid ppl 2.7710 | learning rate 5.0000
|
405 |
+
| end of split 69 / 28 | epoch 13 | time: 3258.90s | valid loss 1.0189 | valid ppl 2.7701 | learning rate 5.0000
|
406 |
+
| end of split 70 / 28 | epoch 13 | time: 3259.22s | valid loss 1.0189 | valid ppl 2.7701 | learning rate 5.0000
|
407 |
+
| end of split 71 / 28 | epoch 13 | time: 3262.28s | valid loss 1.0189 | valid ppl 2.7701 | learning rate 5.0000
|
408 |
+
| end of split 72 / 28 | epoch 13 | time: 3261.10s | valid loss 1.0188 | valid ppl 2.7699 | learning rate 5.0000
|
409 |
+
| end of split 73 / 28 | epoch 13 | time: 3295.78s | valid loss 1.0188 | valid ppl 2.7699 | learning rate 5.0000
|
410 |
+
| end of split 74 / 28 | epoch 13 | time: 3298.17s | valid loss 1.0188 | valid ppl 2.7698 | learning rate 5.0000
|
411 |
+
| end of split 75 / 28 | epoch 13 | time: 3297.19s | valid loss 1.0189 | valid ppl 2.7701 | learning rate 5.0000
|
412 |
+
| end of split 76 / 28 | epoch 13 | time: 3294.64s | valid loss 1.0187 | valid ppl 2.7697 | learning rate 5.0000
|
413 |
+
| end of split 49 / 28 | epoch 14 | time: 3258.86s | valid loss 1.0188 | valid ppl 2.7697 | learning rate 5.0000
|
414 |
+
| end of split 50 / 28 | epoch 14 | time: 3284.30s | valid loss 1.0188 | valid ppl 2.7700 | learning rate 5.0000
|
415 |
+
| end of split 51 / 28 | epoch 14 | time: 3280.96s | valid loss 1.0187 | valid ppl 2.7696 | learning rate 5.0000
|
416 |
+
| end of split 52 / 28 | epoch 14 | time: 964.31s | valid loss 1.0187 | valid ppl 2.7695 | learning rate 5.0000
|
417 |
+
| end of split 53 / 28 | epoch 14 | time: 3306.49s | valid loss 1.0187 | valid ppl 2.7697 | learning rate 5.0000
|
418 |
+
| end of split 54 / 28 | epoch 14 | time: 3241.31s | valid loss 1.0186 | valid ppl 2.7694 | learning rate 5.0000
|
419 |
+
| end of split 55 / 28 | epoch 14 | time: 3289.83s | valid loss 1.0186 | valid ppl 2.7694 | learning rate 5.0000
|
420 |
+
| end of split 56 / 28 | epoch 14 | time: 3329.41s | valid loss 1.0187 | valid ppl 2.7696 | learning rate 5.0000
|
421 |
+
| end of split 57 / 28 | epoch 14 | time: 3305.55s | valid loss 1.0186 | valid ppl 2.7694 | learning rate 5.0000
|
422 |
+
| end of split 58 / 28 | epoch 14 | time: 3330.67s | valid loss 1.0187 | valid ppl 2.7696 | learning rate 5.0000
|
423 |
+
| end of split 59 / 28 | epoch 14 | time: 3292.81s | valid loss 1.0186 | valid ppl 2.7692 | learning rate 5.0000
|
424 |
+
| end of split 60 / 28 | epoch 14 | time: 3264.46s | valid loss 1.0185 | valid ppl 2.7691 | learning rate 5.0000
|