gsmyrnis commited on
Commit
c0bbdba
·
verified ·
1 Parent(s): ee79687

Training in progress, epoch 1

Browse files
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c92a6b45d7ac3eda4d6a3f792a77acc7a755970c91bfbe9d685c7f9d45f352b
3
  size 4976698672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6c5857dd8d7bf751a51ed832110e144c05d1252501eb3c516edd411e0e880da
3
  size 4976698672
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff90183ae0b5fc84434339e332e079e5ede0c54f737da4089ddbd1e8e745a807
3
  size 4999802720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10ce8bcabaa7c5d3d48e7527852f2cd42e57c92af3b1e34043cfe5b2af29d03a
3
  size 4999802720
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba91d8763b9c8aa1f84a3c10c0732e5b617f9ca7b89846d929cafb7b0cf05aee
3
  size 4915916176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bd8615d3e81eaf46656f678ae4d9564af7bb3df87edacbad8367720d07bb409
3
  size 4915916176
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:16952570da61b9b560669fbded6f89506195798f25ccde87f6eb1834e9fa14bf
3
  size 1168138808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1436cafb42c114e82eae1e28fc1e9494069b4b291756f9c9fadbddbf6d48e4a
3
  size 1168138808
trainer_log.jsonl CHANGED
@@ -1,117 +1,69 @@
1
- {"current_steps": 10, "total_steps": 1134, "loss": 1.0158, "lr": 5e-06, "epoch": 0.026455026455026454, "percentage": 0.88, "elapsed_time": "0:00:59", "remaining_time": "1:51:27"}
2
- {"current_steps": 20, "total_steps": 1134, "loss": 0.9159, "lr": 5e-06, "epoch": 0.05291005291005291, "percentage": 1.76, "elapsed_time": "0:01:58", "remaining_time": "1:49:53"}
3
- {"current_steps": 30, "total_steps": 1134, "loss": 0.884, "lr": 5e-06, "epoch": 0.07936507936507936, "percentage": 2.65, "elapsed_time": "0:02:58", "remaining_time": "1:49:25"}
4
- {"current_steps": 40, "total_steps": 1134, "loss": 0.8694, "lr": 5e-06, "epoch": 0.10582010582010581, "percentage": 3.53, "elapsed_time": "0:03:57", "remaining_time": "1:48:17"}
5
- {"current_steps": 50, "total_steps": 1134, "loss": 0.8552, "lr": 5e-06, "epoch": 0.13227513227513227, "percentage": 4.41, "elapsed_time": "0:04:56", "remaining_time": "1:47:10"}
6
- {"current_steps": 60, "total_steps": 1134, "loss": 0.8482, "lr": 5e-06, "epoch": 0.15873015873015872, "percentage": 5.29, "elapsed_time": "0:05:55", "remaining_time": "1:46:08"}
7
- {"current_steps": 70, "total_steps": 1134, "loss": 0.8404, "lr": 5e-06, "epoch": 0.18518518518518517, "percentage": 6.17, "elapsed_time": "0:06:55", "remaining_time": "1:45:19"}
8
- {"current_steps": 80, "total_steps": 1134, "loss": 0.8326, "lr": 5e-06, "epoch": 0.21164021164021163, "percentage": 7.05, "elapsed_time": "0:07:54", "remaining_time": "1:44:16"}
9
- {"current_steps": 90, "total_steps": 1134, "loss": 0.8317, "lr": 5e-06, "epoch": 0.23809523809523808, "percentage": 7.94, "elapsed_time": "0:08:53", "remaining_time": "1:43:10"}
10
- {"current_steps": 100, "total_steps": 1134, "loss": 0.8224, "lr": 5e-06, "epoch": 0.26455026455026454, "percentage": 8.82, "elapsed_time": "0:09:53", "remaining_time": "1:42:17"}
11
- {"current_steps": 110, "total_steps": 1134, "loss": 0.8182, "lr": 5e-06, "epoch": 0.291005291005291, "percentage": 9.7, "elapsed_time": "0:10:52", "remaining_time": "1:41:13"}
12
- {"current_steps": 120, "total_steps": 1134, "loss": 0.8175, "lr": 5e-06, "epoch": 0.31746031746031744, "percentage": 10.58, "elapsed_time": "0:11:51", "remaining_time": "1:40:10"}
13
- {"current_steps": 130, "total_steps": 1134, "loss": 0.8108, "lr": 5e-06, "epoch": 0.3439153439153439, "percentage": 11.46, "elapsed_time": "0:12:51", "remaining_time": "1:39:18"}
14
- {"current_steps": 140, "total_steps": 1134, "loss": 0.8103, "lr": 5e-06, "epoch": 0.37037037037037035, "percentage": 12.35, "elapsed_time": "0:13:50", "remaining_time": "1:38:18"}
15
- {"current_steps": 150, "total_steps": 1134, "loss": 0.813, "lr": 5e-06, "epoch": 0.3968253968253968, "percentage": 13.23, "elapsed_time": "0:14:49", "remaining_time": "1:37:17"}
16
- {"current_steps": 160, "total_steps": 1134, "loss": 0.8054, "lr": 5e-06, "epoch": 0.42328042328042326, "percentage": 14.11, "elapsed_time": "0:15:48", "remaining_time": "1:36:15"}
17
- {"current_steps": 170, "total_steps": 1134, "loss": 0.8092, "lr": 5e-06, "epoch": 0.4497354497354497, "percentage": 14.99, "elapsed_time": "0:16:48", "remaining_time": "1:35:16"}
18
- {"current_steps": 180, "total_steps": 1134, "loss": 0.8069, "lr": 5e-06, "epoch": 0.47619047619047616, "percentage": 15.87, "elapsed_time": "0:17:47", "remaining_time": "1:34:17"}
19
- {"current_steps": 190, "total_steps": 1134, "loss": 0.8031, "lr": 5e-06, "epoch": 0.5026455026455027, "percentage": 16.75, "elapsed_time": "0:18:46", "remaining_time": "1:33:15"}
20
- {"current_steps": 200, "total_steps": 1134, "loss": 0.7987, "lr": 5e-06, "epoch": 0.5291005291005291, "percentage": 17.64, "elapsed_time": "0:19:46", "remaining_time": "1:32:20"}
21
- {"current_steps": 210, "total_steps": 1134, "loss": 0.8005, "lr": 5e-06, "epoch": 0.5555555555555556, "percentage": 18.52, "elapsed_time": "0:20:45", "remaining_time": "1:31:19"}
22
- {"current_steps": 220, "total_steps": 1134, "loss": 0.7949, "lr": 5e-06, "epoch": 0.582010582010582, "percentage": 19.4, "elapsed_time": "0:21:44", "remaining_time": "1:30:18"}
23
- {"current_steps": 230, "total_steps": 1134, "loss": 0.7917, "lr": 5e-06, "epoch": 0.6084656084656085, "percentage": 20.28, "elapsed_time": "0:22:43", "remaining_time": "1:29:19"}
24
- {"current_steps": 240, "total_steps": 1134, "loss": 0.7954, "lr": 5e-06, "epoch": 0.6349206349206349, "percentage": 21.16, "elapsed_time": "0:23:43", "remaining_time": "1:28:23"}
25
- {"current_steps": 250, "total_steps": 1134, "loss": 0.792, "lr": 5e-06, "epoch": 0.6613756613756614, "percentage": 22.05, "elapsed_time": "0:24:42", "remaining_time": "1:27:22"}
26
- {"current_steps": 260, "total_steps": 1134, "loss": 0.7948, "lr": 5e-06, "epoch": 0.6878306878306878, "percentage": 22.93, "elapsed_time": "0:25:41", "remaining_time": "1:26:20"}
27
- {"current_steps": 270, "total_steps": 1134, "loss": 0.794, "lr": 5e-06, "epoch": 0.7142857142857143, "percentage": 23.81, "elapsed_time": "0:26:40", "remaining_time": "1:25:22"}
28
- {"current_steps": 280, "total_steps": 1134, "loss": 0.7946, "lr": 5e-06, "epoch": 0.7407407407407407, "percentage": 24.69, "elapsed_time": "0:27:40", "remaining_time": "1:24:24"}
29
- {"current_steps": 290, "total_steps": 1134, "loss": 0.7923, "lr": 5e-06, "epoch": 0.7671957671957672, "percentage": 25.57, "elapsed_time": "0:28:39", "remaining_time": "1:23:24"}
30
- {"current_steps": 300, "total_steps": 1134, "loss": 0.7886, "lr": 5e-06, "epoch": 0.7936507936507936, "percentage": 26.46, "elapsed_time": "0:29:38", "remaining_time": "1:22:23"}
31
- {"current_steps": 310, "total_steps": 1134, "loss": 0.7855, "lr": 5e-06, "epoch": 0.8201058201058201, "percentage": 27.34, "elapsed_time": "0:30:38", "remaining_time": "1:21:26"}
32
- {"current_steps": 320, "total_steps": 1134, "loss": 0.789, "lr": 5e-06, "epoch": 0.8465608465608465, "percentage": 28.22, "elapsed_time": "0:31:37", "remaining_time": "1:20:27"}
33
- {"current_steps": 330, "total_steps": 1134, "loss": 0.7837, "lr": 5e-06, "epoch": 0.873015873015873, "percentage": 29.1, "elapsed_time": "0:32:36", "remaining_time": "1:19:27"}
34
- {"current_steps": 340, "total_steps": 1134, "loss": 0.7894, "lr": 5e-06, "epoch": 0.8994708994708994, "percentage": 29.98, "elapsed_time": "0:33:35", "remaining_time": "1:18:27"}
35
- {"current_steps": 350, "total_steps": 1134, "loss": 0.7921, "lr": 5e-06, "epoch": 0.9259259259259259, "percentage": 30.86, "elapsed_time": "0:34:35", "remaining_time": "1:17:29"}
36
- {"current_steps": 360, "total_steps": 1134, "loss": 0.7868, "lr": 5e-06, "epoch": 0.9523809523809523, "percentage": 31.75, "elapsed_time": "0:35:34", "remaining_time": "1:16:29"}
37
- {"current_steps": 370, "total_steps": 1134, "loss": 0.7841, "lr": 5e-06, "epoch": 0.9788359788359788, "percentage": 32.63, "elapsed_time": "0:36:33", "remaining_time": "1:15:29"}
38
- {"current_steps": 378, "total_steps": 1134, "eval_loss": 0.7856976389884949, "epoch": 1.0, "percentage": 33.33, "elapsed_time": "0:37:57", "remaining_time": "1:15:55"}
39
- {"current_steps": 380, "total_steps": 1134, "loss": 0.7788, "lr": 5e-06, "epoch": 1.0052910052910053, "percentage": 33.51, "elapsed_time": "0:39:39", "remaining_time": "1:18:41"}
40
- {"current_steps": 390, "total_steps": 1134, "loss": 0.7528, "lr": 5e-06, "epoch": 1.0317460317460316, "percentage": 34.39, "elapsed_time": "0:40:38", "remaining_time": "1:17:31"}
41
- {"current_steps": 400, "total_steps": 1134, "loss": 0.7514, "lr": 5e-06, "epoch": 1.0582010582010581, "percentage": 35.27, "elapsed_time": "0:41:36", "remaining_time": "1:16:21"}
42
- {"current_steps": 410, "total_steps": 1134, "loss": 0.7539, "lr": 5e-06, "epoch": 1.0846560846560847, "percentage": 36.16, "elapsed_time": "0:42:35", "remaining_time": "1:15:12"}
43
- {"current_steps": 420, "total_steps": 1134, "loss": 0.7536, "lr": 5e-06, "epoch": 1.1111111111111112, "percentage": 37.04, "elapsed_time": "0:43:34", "remaining_time": "1:14:04"}
44
- {"current_steps": 430, "total_steps": 1134, "loss": 0.7508, "lr": 5e-06, "epoch": 1.1375661375661377, "percentage": 37.92, "elapsed_time": "0:44:34", "remaining_time": "1:12:58"}
45
- {"current_steps": 440, "total_steps": 1134, "loss": 0.7546, "lr": 5e-06, "epoch": 1.164021164021164, "percentage": 38.8, "elapsed_time": "0:45:33", "remaining_time": "1:11:51"}
46
- {"current_steps": 450, "total_steps": 1134, "loss": 0.7528, "lr": 5e-06, "epoch": 1.1904761904761905, "percentage": 39.68, "elapsed_time": "0:46:32", "remaining_time": "1:10:44"}
47
- {"current_steps": 460, "total_steps": 1134, "loss": 0.7534, "lr": 5e-06, "epoch": 1.216931216931217, "percentage": 40.56, "elapsed_time": "0:47:32", "remaining_time": "1:09:38"}
48
- {"current_steps": 470, "total_steps": 1134, "loss": 0.7512, "lr": 5e-06, "epoch": 1.2433862433862433, "percentage": 41.45, "elapsed_time": "0:48:31", "remaining_time": "1:08:33"}
49
- {"current_steps": 480, "total_steps": 1134, "loss": 0.7537, "lr": 5e-06, "epoch": 1.2698412698412698, "percentage": 42.33, "elapsed_time": "0:49:30", "remaining_time": "1:07:26"}
50
- {"current_steps": 490, "total_steps": 1134, "loss": 0.7532, "lr": 5e-06, "epoch": 1.2962962962962963, "percentage": 43.21, "elapsed_time": "0:50:29", "remaining_time": "1:06:22"}
51
- {"current_steps": 500, "total_steps": 1134, "loss": 0.75, "lr": 5e-06, "epoch": 1.3227513227513228, "percentage": 44.09, "elapsed_time": "0:51:29", "remaining_time": "1:05:17"}
52
- {"current_steps": 510, "total_steps": 1134, "loss": 0.7476, "lr": 5e-06, "epoch": 1.3492063492063493, "percentage": 44.97, "elapsed_time": "0:52:28", "remaining_time": "1:04:12"}
53
- {"current_steps": 520, "total_steps": 1134, "loss": 0.7506, "lr": 5e-06, "epoch": 1.3756613756613756, "percentage": 45.86, "elapsed_time": "0:53:27", "remaining_time": "1:03:07"}
54
- {"current_steps": 530, "total_steps": 1134, "loss": 0.7531, "lr": 5e-06, "epoch": 1.402116402116402, "percentage": 46.74, "elapsed_time": "0:54:27", "remaining_time": "1:02:03"}
55
- {"current_steps": 540, "total_steps": 1134, "loss": 0.7499, "lr": 5e-06, "epoch": 1.4285714285714286, "percentage": 47.62, "elapsed_time": "0:55:26", "remaining_time": "1:00:58"}
56
- {"current_steps": 550, "total_steps": 1134, "loss": 0.7508, "lr": 5e-06, "epoch": 1.455026455026455, "percentage": 48.5, "elapsed_time": "0:56:25", "remaining_time": "0:59:54"}
57
- {"current_steps": 560, "total_steps": 1134, "loss": 0.7531, "lr": 5e-06, "epoch": 1.4814814814814814, "percentage": 49.38, "elapsed_time": "0:57:25", "remaining_time": "0:58:51"}
58
- {"current_steps": 570, "total_steps": 1134, "loss": 0.7514, "lr": 5e-06, "epoch": 1.507936507936508, "percentage": 50.26, "elapsed_time": "0:58:23", "remaining_time": "0:57:47"}
59
- {"current_steps": 580, "total_steps": 1134, "loss": 0.7506, "lr": 5e-06, "epoch": 1.5343915343915344, "percentage": 51.15, "elapsed_time": "0:59:23", "remaining_time": "0:56:43"}
60
- {"current_steps": 590, "total_steps": 1134, "loss": 0.7498, "lr": 5e-06, "epoch": 1.560846560846561, "percentage": 52.03, "elapsed_time": "1:00:22", "remaining_time": "0:55:40"}
61
- {"current_steps": 600, "total_steps": 1134, "loss": 0.7515, "lr": 5e-06, "epoch": 1.5873015873015874, "percentage": 52.91, "elapsed_time": "1:01:21", "remaining_time": "0:54:36"}
62
- {"current_steps": 610, "total_steps": 1134, "loss": 0.7518, "lr": 5e-06, "epoch": 1.6137566137566137, "percentage": 53.79, "elapsed_time": "1:02:21", "remaining_time": "0:53:33"}
63
- {"current_steps": 620, "total_steps": 1134, "loss": 0.7461, "lr": 5e-06, "epoch": 1.6402116402116402, "percentage": 54.67, "elapsed_time": "1:03:21", "remaining_time": "0:52:31"}
64
- {"current_steps": 630, "total_steps": 1134, "loss": 0.7478, "lr": 5e-06, "epoch": 1.6666666666666665, "percentage": 55.56, "elapsed_time": "1:04:20", "remaining_time": "0:51:28"}
65
- {"current_steps": 640, "total_steps": 1134, "loss": 0.7486, "lr": 5e-06, "epoch": 1.693121693121693, "percentage": 56.44, "elapsed_time": "1:05:19", "remaining_time": "0:50:25"}
66
- {"current_steps": 650, "total_steps": 1134, "loss": 0.75, "lr": 5e-06, "epoch": 1.7195767195767195, "percentage": 57.32, "elapsed_time": "1:06:18", "remaining_time": "0:49:22"}
67
- {"current_steps": 660, "total_steps": 1134, "loss": 0.7494, "lr": 5e-06, "epoch": 1.746031746031746, "percentage": 58.2, "elapsed_time": "1:07:18", "remaining_time": "0:48:20"}
68
- {"current_steps": 670, "total_steps": 1134, "loss": 0.7491, "lr": 5e-06, "epoch": 1.7724867724867726, "percentage": 59.08, "elapsed_time": "1:08:17", "remaining_time": "0:47:17"}
69
- {"current_steps": 680, "total_steps": 1134, "loss": 0.7432, "lr": 5e-06, "epoch": 1.798941798941799, "percentage": 59.96, "elapsed_time": "1:09:16", "remaining_time": "0:46:14"}
70
- {"current_steps": 690, "total_steps": 1134, "loss": 0.7488, "lr": 5e-06, "epoch": 1.8253968253968254, "percentage": 60.85, "elapsed_time": "1:10:15", "remaining_time": "0:45:12"}
71
- {"current_steps": 700, "total_steps": 1134, "loss": 0.7434, "lr": 5e-06, "epoch": 1.8518518518518519, "percentage": 61.73, "elapsed_time": "1:11:14", "remaining_time": "0:44:10"}
72
- {"current_steps": 710, "total_steps": 1134, "loss": 0.7475, "lr": 5e-06, "epoch": 1.8783068783068781, "percentage": 62.61, "elapsed_time": "1:12:13", "remaining_time": "0:43:08"}
73
- {"current_steps": 720, "total_steps": 1134, "loss": 0.7418, "lr": 5e-06, "epoch": 1.9047619047619047, "percentage": 63.49, "elapsed_time": "1:13:13", "remaining_time": "0:42:06"}
74
- {"current_steps": 730, "total_steps": 1134, "loss": 0.7451, "lr": 5e-06, "epoch": 1.9312169312169312, "percentage": 64.37, "elapsed_time": "1:14:12", "remaining_time": "0:41:04"}
75
- {"current_steps": 740, "total_steps": 1134, "loss": 0.7466, "lr": 5e-06, "epoch": 1.9576719576719577, "percentage": 65.26, "elapsed_time": "1:15:11", "remaining_time": "0:40:02"}
76
- {"current_steps": 750, "total_steps": 1134, "loss": 0.7445, "lr": 5e-06, "epoch": 1.9841269841269842, "percentage": 66.14, "elapsed_time": "1:16:11", "remaining_time": "0:39:00"}
77
- {"current_steps": 756, "total_steps": 1134, "eval_loss": 0.7725165486335754, "epoch": 2.0, "percentage": 66.67, "elapsed_time": "1:17:23", "remaining_time": "0:38:41"}
78
- {"current_steps": 760, "total_steps": 1134, "loss": 0.7322, "lr": 5e-06, "epoch": 2.0105820105820107, "percentage": 67.02, "elapsed_time": "1:19:21", "remaining_time": "0:39:03"}
79
- {"current_steps": 770, "total_steps": 1134, "loss": 0.7091, "lr": 5e-06, "epoch": 2.037037037037037, "percentage": 67.9, "elapsed_time": "1:20:20", "remaining_time": "0:37:58"}
80
- {"current_steps": 780, "total_steps": 1134, "loss": 0.711, "lr": 5e-06, "epoch": 2.0634920634920633, "percentage": 68.78, "elapsed_time": "1:21:19", "remaining_time": "0:36:54"}
81
- {"current_steps": 790, "total_steps": 1134, "loss": 0.7113, "lr": 5e-06, "epoch": 2.0899470899470898, "percentage": 69.66, "elapsed_time": "1:22:19", "remaining_time": "0:35:50"}
82
- {"current_steps": 800, "total_steps": 1134, "loss": 0.7109, "lr": 5e-06, "epoch": 2.1164021164021163, "percentage": 70.55, "elapsed_time": "1:23:18", "remaining_time": "0:34:46"}
83
- {"current_steps": 810, "total_steps": 1134, "loss": 0.7126, "lr": 5e-06, "epoch": 2.142857142857143, "percentage": 71.43, "elapsed_time": "1:24:17", "remaining_time": "0:33:42"}
84
- {"current_steps": 820, "total_steps": 1134, "loss": 0.7124, "lr": 5e-06, "epoch": 2.1693121693121693, "percentage": 72.31, "elapsed_time": "1:25:16", "remaining_time": "0:32:39"}
85
- {"current_steps": 830, "total_steps": 1134, "loss": 0.7123, "lr": 5e-06, "epoch": 2.195767195767196, "percentage": 73.19, "elapsed_time": "1:26:16", "remaining_time": "0:31:35"}
86
- {"current_steps": 840, "total_steps": 1134, "loss": 0.7123, "lr": 5e-06, "epoch": 2.2222222222222223, "percentage": 74.07, "elapsed_time": "1:27:15", "remaining_time": "0:30:32"}
87
- {"current_steps": 850, "total_steps": 1134, "loss": 0.7139, "lr": 5e-06, "epoch": 2.248677248677249, "percentage": 74.96, "elapsed_time": "1:28:14", "remaining_time": "0:29:28"}
88
- {"current_steps": 860, "total_steps": 1134, "loss": 0.7164, "lr": 5e-06, "epoch": 2.2751322751322753, "percentage": 75.84, "elapsed_time": "1:29:13", "remaining_time": "0:28:25"}
89
- {"current_steps": 870, "total_steps": 1134, "loss": 0.7141, "lr": 5e-06, "epoch": 2.3015873015873014, "percentage": 76.72, "elapsed_time": "1:30:13", "remaining_time": "0:27:22"}
90
- {"current_steps": 880, "total_steps": 1134, "loss": 0.7189, "lr": 5e-06, "epoch": 2.328042328042328, "percentage": 77.6, "elapsed_time": "1:31:12", "remaining_time": "0:26:19"}
91
- {"current_steps": 890, "total_steps": 1134, "loss": 0.7159, "lr": 5e-06, "epoch": 2.3544973544973544, "percentage": 78.48, "elapsed_time": "1:32:11", "remaining_time": "0:25:16"}
92
- {"current_steps": 900, "total_steps": 1134, "loss": 0.717, "lr": 5e-06, "epoch": 2.380952380952381, "percentage": 79.37, "elapsed_time": "1:33:11", "remaining_time": "0:24:13"}
93
- {"current_steps": 910, "total_steps": 1134, "loss": 0.7176, "lr": 5e-06, "epoch": 2.4074074074074074, "percentage": 80.25, "elapsed_time": "1:34:10", "remaining_time": "0:23:10"}
94
- {"current_steps": 920, "total_steps": 1134, "loss": 0.7141, "lr": 5e-06, "epoch": 2.433862433862434, "percentage": 81.13, "elapsed_time": "1:35:09", "remaining_time": "0:22:08"}
95
- {"current_steps": 930, "total_steps": 1134, "loss": 0.7147, "lr": 5e-06, "epoch": 2.4603174603174605, "percentage": 82.01, "elapsed_time": "1:36:08", "remaining_time": "0:21:05"}
96
- {"current_steps": 940, "total_steps": 1134, "loss": 0.7126, "lr": 5e-06, "epoch": 2.4867724867724865, "percentage": 82.89, "elapsed_time": "1:37:07", "remaining_time": "0:20:02"}
97
- {"current_steps": 950, "total_steps": 1134, "loss": 0.7171, "lr": 5e-06, "epoch": 2.5132275132275135, "percentage": 83.77, "elapsed_time": "1:38:07", "remaining_time": "0:19:00"}
98
- {"current_steps": 960, "total_steps": 1134, "loss": 0.7182, "lr": 5e-06, "epoch": 2.5396825396825395, "percentage": 84.66, "elapsed_time": "1:39:07", "remaining_time": "0:17:57"}
99
- {"current_steps": 970, "total_steps": 1134, "loss": 0.7187, "lr": 5e-06, "epoch": 2.566137566137566, "percentage": 85.54, "elapsed_time": "1:40:05", "remaining_time": "0:16:55"}
100
- {"current_steps": 980, "total_steps": 1134, "loss": 0.7164, "lr": 5e-06, "epoch": 2.5925925925925926, "percentage": 86.42, "elapsed_time": "1:41:05", "remaining_time": "0:15:53"}
101
- {"current_steps": 990, "total_steps": 1134, "loss": 0.7157, "lr": 5e-06, "epoch": 2.619047619047619, "percentage": 87.3, "elapsed_time": "1:42:05", "remaining_time": "0:14:50"}
102
- {"current_steps": 1000, "total_steps": 1134, "loss": 0.7175, "lr": 5e-06, "epoch": 2.6455026455026456, "percentage": 88.18, "elapsed_time": "1:43:04", "remaining_time": "0:13:48"}
103
- {"current_steps": 1010, "total_steps": 1134, "loss": 0.715, "lr": 5e-06, "epoch": 2.671957671957672, "percentage": 89.07, "elapsed_time": "1:44:03", "remaining_time": "0:12:46"}
104
- {"current_steps": 1020, "total_steps": 1134, "loss": 0.7208, "lr": 5e-06, "epoch": 2.6984126984126986, "percentage": 89.95, "elapsed_time": "1:45:02", "remaining_time": "0:11:44"}
105
- {"current_steps": 1030, "total_steps": 1134, "loss": 0.7157, "lr": 5e-06, "epoch": 2.7248677248677247, "percentage": 90.83, "elapsed_time": "1:46:02", "remaining_time": "0:10:42"}
106
- {"current_steps": 1040, "total_steps": 1134, "loss": 0.7119, "lr": 5e-06, "epoch": 2.751322751322751, "percentage": 91.71, "elapsed_time": "1:47:01", "remaining_time": "0:09:40"}
107
- {"current_steps": 1050, "total_steps": 1134, "loss": 0.7159, "lr": 5e-06, "epoch": 2.7777777777777777, "percentage": 92.59, "elapsed_time": "1:47:59", "remaining_time": "0:08:38"}
108
- {"current_steps": 1060, "total_steps": 1134, "loss": 0.7152, "lr": 5e-06, "epoch": 2.804232804232804, "percentage": 93.47, "elapsed_time": "1:48:59", "remaining_time": "0:07:36"}
109
- {"current_steps": 1070, "total_steps": 1134, "loss": 0.7171, "lr": 5e-06, "epoch": 2.8306878306878307, "percentage": 94.36, "elapsed_time": "1:49:58", "remaining_time": "0:06:34"}
110
- {"current_steps": 1080, "total_steps": 1134, "loss": 0.7154, "lr": 5e-06, "epoch": 2.857142857142857, "percentage": 95.24, "elapsed_time": "1:50:57", "remaining_time": "0:05:32"}
111
- {"current_steps": 1090, "total_steps": 1134, "loss": 0.7152, "lr": 5e-06, "epoch": 2.8835978835978837, "percentage": 96.12, "elapsed_time": "1:51:57", "remaining_time": "0:04:31"}
112
- {"current_steps": 1100, "total_steps": 1134, "loss": 0.7181, "lr": 5e-06, "epoch": 2.91005291005291, "percentage": 97.0, "elapsed_time": "1:52:56", "remaining_time": "0:03:29"}
113
- {"current_steps": 1110, "total_steps": 1134, "loss": 0.7167, "lr": 5e-06, "epoch": 2.9365079365079367, "percentage": 97.88, "elapsed_time": "1:53:56", "remaining_time": "0:02:27"}
114
- {"current_steps": 1120, "total_steps": 1134, "loss": 0.7146, "lr": 5e-06, "epoch": 2.962962962962963, "percentage": 98.77, "elapsed_time": "1:54:55", "remaining_time": "0:01:26"}
115
- {"current_steps": 1130, "total_steps": 1134, "loss": 0.7153, "lr": 5e-06, "epoch": 2.9894179894179893, "percentage": 99.65, "elapsed_time": "1:55:54", "remaining_time": "0:00:24"}
116
- {"current_steps": 1134, "total_steps": 1134, "eval_loss": 0.7704827189445496, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "1:58:29", "remaining_time": "0:00:00"}
117
- {"current_steps": 1134, "total_steps": 1134, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "1:59:56", "remaining_time": "0:00:00"}
 
1
+ {"current_steps": 10, "total_steps": 1134, "loss": 1.0155, "lr": 5e-06, "epoch": 0.026455026455026454, "percentage": 0.88, "elapsed_time": "0:00:59", "remaining_time": "1:51:38"}
2
+ {"current_steps": 20, "total_steps": 1134, "loss": 0.9187, "lr": 5e-06, "epoch": 0.05291005291005291, "percentage": 1.76, "elapsed_time": "0:01:57", "remaining_time": "1:49:22"}
3
+ {"current_steps": 30, "total_steps": 1134, "loss": 0.8889, "lr": 5e-06, "epoch": 0.07936507936507936, "percentage": 2.65, "elapsed_time": "0:02:56", "remaining_time": "1:47:58"}
4
+ {"current_steps": 40, "total_steps": 1134, "loss": 0.8728, "lr": 5e-06, "epoch": 0.10582010582010581, "percentage": 3.53, "elapsed_time": "0:03:54", "remaining_time": "1:46:50"}
5
+ {"current_steps": 50, "total_steps": 1134, "loss": 0.8553, "lr": 5e-06, "epoch": 0.13227513227513227, "percentage": 4.41, "elapsed_time": "0:04:52", "remaining_time": "1:45:44"}
6
+ {"current_steps": 60, "total_steps": 1134, "loss": 0.8472, "lr": 5e-06, "epoch": 0.15873015873015872, "percentage": 5.29, "elapsed_time": "0:05:50", "remaining_time": "1:44:41"}
7
+ {"current_steps": 70, "total_steps": 1134, "loss": 0.8386, "lr": 5e-06, "epoch": 0.18518518518518517, "percentage": 6.17, "elapsed_time": "0:06:49", "remaining_time": "1:43:45"}
8
+ {"current_steps": 80, "total_steps": 1134, "loss": 0.8303, "lr": 5e-06, "epoch": 0.21164021164021163, "percentage": 7.05, "elapsed_time": "0:07:47", "remaining_time": "1:42:45"}
9
+ {"current_steps": 90, "total_steps": 1134, "loss": 0.8296, "lr": 5e-06, "epoch": 0.23809523809523808, "percentage": 7.94, "elapsed_time": "0:08:46", "remaining_time": "1:41:44"}
10
+ {"current_steps": 100, "total_steps": 1134, "loss": 0.8208, "lr": 5e-06, "epoch": 0.26455026455026454, "percentage": 8.82, "elapsed_time": "0:09:44", "remaining_time": "1:40:45"}
11
+ {"current_steps": 110, "total_steps": 1134, "loss": 0.817, "lr": 5e-06, "epoch": 0.291005291005291, "percentage": 9.7, "elapsed_time": "0:10:43", "remaining_time": "1:39:47"}
12
+ {"current_steps": 120, "total_steps": 1134, "loss": 0.816, "lr": 5e-06, "epoch": 0.31746031746031744, "percentage": 10.58, "elapsed_time": "0:11:41", "remaining_time": "1:38:46"}
13
+ {"current_steps": 130, "total_steps": 1134, "loss": 0.8095, "lr": 5e-06, "epoch": 0.3439153439153439, "percentage": 11.46, "elapsed_time": "0:12:39", "remaining_time": "1:37:46"}
14
+ {"current_steps": 140, "total_steps": 1134, "loss": 0.809, "lr": 5e-06, "epoch": 0.37037037037037035, "percentage": 12.35, "elapsed_time": "0:13:38", "remaining_time": "1:36:49"}
15
+ {"current_steps": 150, "total_steps": 1134, "loss": 0.8116, "lr": 5e-06, "epoch": 0.3968253968253968, "percentage": 13.23, "elapsed_time": "0:14:36", "remaining_time": "1:35:49"}
16
+ {"current_steps": 160, "total_steps": 1134, "loss": 0.8043, "lr": 5e-06, "epoch": 0.42328042328042326, "percentage": 14.11, "elapsed_time": "0:15:34", "remaining_time": "1:34:49"}
17
+ {"current_steps": 170, "total_steps": 1134, "loss": 0.8081, "lr": 5e-06, "epoch": 0.4497354497354497, "percentage": 14.99, "elapsed_time": "0:16:33", "remaining_time": "1:33:55"}
18
+ {"current_steps": 180, "total_steps": 1134, "loss": 0.806, "lr": 5e-06, "epoch": 0.47619047619047616, "percentage": 15.87, "elapsed_time": "0:17:32", "remaining_time": "1:32:58"}
19
+ {"current_steps": 190, "total_steps": 1134, "loss": 0.8021, "lr": 5e-06, "epoch": 0.5026455026455027, "percentage": 16.75, "elapsed_time": "0:18:30", "remaining_time": "1:31:59"}
20
+ {"current_steps": 200, "total_steps": 1134, "loss": 0.7978, "lr": 5e-06, "epoch": 0.5291005291005291, "percentage": 17.64, "elapsed_time": "0:19:29", "remaining_time": "1:30:59"}
21
+ {"current_steps": 210, "total_steps": 1134, "loss": 0.7997, "lr": 5e-06, "epoch": 0.5555555555555556, "percentage": 18.52, "elapsed_time": "0:20:27", "remaining_time": "1:30:01"}
22
+ {"current_steps": 220, "total_steps": 1134, "loss": 0.7942, "lr": 5e-06, "epoch": 0.582010582010582, "percentage": 19.4, "elapsed_time": "0:21:26", "remaining_time": "1:29:03"}
23
+ {"current_steps": 230, "total_steps": 1134, "loss": 0.791, "lr": 5e-06, "epoch": 0.6084656084656085, "percentage": 20.28, "elapsed_time": "0:22:24", "remaining_time": "1:28:03"}
24
+ {"current_steps": 240, "total_steps": 1134, "loss": 0.7948, "lr": 5e-06, "epoch": 0.6349206349206349, "percentage": 21.16, "elapsed_time": "0:23:22", "remaining_time": "1:27:04"}
25
+ {"current_steps": 250, "total_steps": 1134, "loss": 0.7914, "lr": 5e-06, "epoch": 0.6613756613756614, "percentage": 22.05, "elapsed_time": "0:24:21", "remaining_time": "1:26:06"}
26
+ {"current_steps": 260, "total_steps": 1134, "loss": 0.7941, "lr": 5e-06, "epoch": 0.6878306878306878, "percentage": 22.93, "elapsed_time": "0:25:19", "remaining_time": "1:25:07"}
27
+ {"current_steps": 270, "total_steps": 1134, "loss": 0.7934, "lr": 5e-06, "epoch": 0.7142857142857143, "percentage": 23.81, "elapsed_time": "0:26:17", "remaining_time": "1:24:08"}
28
+ {"current_steps": 280, "total_steps": 1134, "loss": 0.7938, "lr": 5e-06, "epoch": 0.7407407407407407, "percentage": 24.69, "elapsed_time": "0:27:17", "remaining_time": "1:23:14"}
29
+ {"current_steps": 290, "total_steps": 1134, "loss": 0.7917, "lr": 5e-06, "epoch": 0.7671957671957672, "percentage": 25.57, "elapsed_time": "0:28:16", "remaining_time": "1:22:17"}
30
+ {"current_steps": 300, "total_steps": 1134, "loss": 0.7882, "lr": 5e-06, "epoch": 0.7936507936507936, "percentage": 26.46, "elapsed_time": "0:29:15", "remaining_time": "1:21:20"}
31
+ {"current_steps": 310, "total_steps": 1134, "loss": 0.7849, "lr": 5e-06, "epoch": 0.8201058201058201, "percentage": 27.34, "elapsed_time": "0:30:13", "remaining_time": "1:20:21"}
32
+ {"current_steps": 320, "total_steps": 1134, "loss": 0.7884, "lr": 5e-06, "epoch": 0.8465608465608465, "percentage": 28.22, "elapsed_time": "0:31:12", "remaining_time": "1:19:22"}
33
+ {"current_steps": 330, "total_steps": 1134, "loss": 0.7832, "lr": 5e-06, "epoch": 0.873015873015873, "percentage": 29.1, "elapsed_time": "0:32:10", "remaining_time": "1:18:23"}
34
+ {"current_steps": 340, "total_steps": 1134, "loss": 0.7889, "lr": 5e-06, "epoch": 0.8994708994708994, "percentage": 29.98, "elapsed_time": "0:33:09", "remaining_time": "1:17:25"}
35
+ {"current_steps": 350, "total_steps": 1134, "loss": 0.7915, "lr": 5e-06, "epoch": 0.9259259259259259, "percentage": 30.86, "elapsed_time": "0:34:07", "remaining_time": "1:16:26"}
36
+ {"current_steps": 360, "total_steps": 1134, "loss": 0.7863, "lr": 5e-06, "epoch": 0.9523809523809523, "percentage": 31.75, "elapsed_time": "0:35:05", "remaining_time": "1:15:27"}
37
+ {"current_steps": 370, "total_steps": 1134, "loss": 0.7836, "lr": 5e-06, "epoch": 0.9788359788359788, "percentage": 32.63, "elapsed_time": "0:36:04", "remaining_time": "1:14:30"}
38
+ {"current_steps": 378, "total_steps": 1134, "eval_loss": 0.7853822112083435, "epoch": 1.0, "percentage": 33.33, "elapsed_time": "0:37:28", "remaining_time": "1:14:57"}
39
+ {"current_steps": 380, "total_steps": 1134, "loss": 0.7782, "lr": 5e-06, "epoch": 1.0052910052910053, "percentage": 33.51, "elapsed_time": "0:39:10", "remaining_time": "1:17:44"}
40
+ {"current_steps": 390, "total_steps": 1134, "loss": 0.752, "lr": 5e-06, "epoch": 1.0317460317460316, "percentage": 34.39, "elapsed_time": "0:40:09", "remaining_time": "1:16:35"}
41
+ {"current_steps": 400, "total_steps": 1134, "loss": 0.7504, "lr": 5e-06, "epoch": 1.0582010582010581, "percentage": 35.27, "elapsed_time": "0:41:07", "remaining_time": "1:15:27"}
42
+ {"current_steps": 410, "total_steps": 1134, "loss": 0.753, "lr": 5e-06, "epoch": 1.0846560846560847, "percentage": 36.16, "elapsed_time": "0:42:05", "remaining_time": "1:14:20"}
43
+ {"current_steps": 420, "total_steps": 1134, "loss": 0.7526, "lr": 5e-06, "epoch": 1.1111111111111112, "percentage": 37.04, "elapsed_time": "0:43:04", "remaining_time": "1:13:13"}
44
+ {"current_steps": 430, "total_steps": 1134, "loss": 0.7499, "lr": 5e-06, "epoch": 1.1375661375661377, "percentage": 37.92, "elapsed_time": "0:44:02", "remaining_time": "1:12:06"}
45
+ {"current_steps": 440, "total_steps": 1134, "loss": 0.7534, "lr": 5e-06, "epoch": 1.164021164021164, "percentage": 38.8, "elapsed_time": "0:45:01", "remaining_time": "1:11:00"}
46
+ {"current_steps": 450, "total_steps": 1134, "loss": 0.7516, "lr": 5e-06, "epoch": 1.1904761904761905, "percentage": 39.68, "elapsed_time": "0:45:59", "remaining_time": "1:09:54"}
47
+ {"current_steps": 460, "total_steps": 1134, "loss": 0.7525, "lr": 5e-06, "epoch": 1.216931216931217, "percentage": 40.56, "elapsed_time": "0:46:57", "remaining_time": "1:08:48"}
48
+ {"current_steps": 470, "total_steps": 1134, "loss": 0.7506, "lr": 5e-06, "epoch": 1.2433862433862433, "percentage": 41.45, "elapsed_time": "0:47:56", "remaining_time": "1:07:43"}
49
+ {"current_steps": 480, "total_steps": 1134, "loss": 0.7529, "lr": 5e-06, "epoch": 1.2698412698412698, "percentage": 42.33, "elapsed_time": "0:48:55", "remaining_time": "1:06:38"}
50
+ {"current_steps": 490, "total_steps": 1134, "loss": 0.7523, "lr": 5e-06, "epoch": 1.2962962962962963, "percentage": 43.21, "elapsed_time": "0:49:53", "remaining_time": "1:05:34"}
51
+ {"current_steps": 500, "total_steps": 1134, "loss": 0.7493, "lr": 5e-06, "epoch": 1.3227513227513228, "percentage": 44.09, "elapsed_time": "0:50:51", "remaining_time": "1:04:29"}
52
+ {"current_steps": 510, "total_steps": 1134, "loss": 0.7468, "lr": 5e-06, "epoch": 1.3492063492063493, "percentage": 44.97, "elapsed_time": "0:51:50", "remaining_time": "1:03:25"}
53
+ {"current_steps": 520, "total_steps": 1134, "loss": 0.7498, "lr": 5e-06, "epoch": 1.3756613756613756, "percentage": 45.86, "elapsed_time": "0:52:48", "remaining_time": "1:02:21"}
54
+ {"current_steps": 530, "total_steps": 1134, "loss": 0.7523, "lr": 5e-06, "epoch": 1.402116402116402, "percentage": 46.74, "elapsed_time": "0:53:46", "remaining_time": "1:01:17"}
55
+ {"current_steps": 540, "total_steps": 1134, "loss": 0.7491, "lr": 5e-06, "epoch": 1.4285714285714286, "percentage": 47.62, "elapsed_time": "0:54:45", "remaining_time": "1:00:13"}
56
+ {"current_steps": 550, "total_steps": 1134, "loss": 0.7501, "lr": 5e-06, "epoch": 1.455026455026455, "percentage": 48.5, "elapsed_time": "0:55:43", "remaining_time": "0:59:10"}
57
+ {"current_steps": 560, "total_steps": 1134, "loss": 0.7523, "lr": 5e-06, "epoch": 1.4814814814814814, "percentage": 49.38, "elapsed_time": "0:56:41", "remaining_time": "0:58:06"}
58
+ {"current_steps": 570, "total_steps": 1134, "loss": 0.7506, "lr": 5e-06, "epoch": 1.507936507936508, "percentage": 50.26, "elapsed_time": "0:57:40", "remaining_time": "0:57:03"}
59
+ {"current_steps": 580, "total_steps": 1134, "loss": 0.7498, "lr": 5e-06, "epoch": 1.5343915343915344, "percentage": 51.15, "elapsed_time": "0:58:38", "remaining_time": "0:56:00"}
60
+ {"current_steps": 590, "total_steps": 1134, "loss": 0.749, "lr": 5e-06, "epoch": 1.560846560846561, "percentage": 52.03, "elapsed_time": "0:59:36", "remaining_time": "0:54:57"}
61
+ {"current_steps": 600, "total_steps": 1134, "loss": 0.7507, "lr": 5e-06, "epoch": 1.5873015873015874, "percentage": 52.91, "elapsed_time": "1:00:35", "remaining_time": "0:53:55"}
62
+ {"current_steps": 610, "total_steps": 1134, "loss": 0.751, "lr": 5e-06, "epoch": 1.6137566137566137, "percentage": 53.79, "elapsed_time": "1:01:33", "remaining_time": "0:52:53"}
63
+ {"current_steps": 620, "total_steps": 1134, "loss": 0.7452, "lr": 5e-06, "epoch": 1.6402116402116402, "percentage": 54.67, "elapsed_time": "1:02:32", "remaining_time": "0:51:50"}
64
+ {"current_steps": 630, "total_steps": 1134, "loss": 0.747, "lr": 5e-06, "epoch": 1.6666666666666665, "percentage": 55.56, "elapsed_time": "1:03:30", "remaining_time": "0:50:48"}
65
+ {"current_steps": 640, "total_steps": 1134, "loss": 0.7478, "lr": 5e-06, "epoch": 1.693121693121693, "percentage": 56.44, "elapsed_time": "1:04:28", "remaining_time": "0:49:46"}
66
+ {"current_steps": 650, "total_steps": 1134, "loss": 0.7493, "lr": 5e-06, "epoch": 1.7195767195767195, "percentage": 57.32, "elapsed_time": "1:05:27", "remaining_time": "0:48:44"}
67
+ {"current_steps": 660, "total_steps": 1134, "loss": 0.7488, "lr": 5e-06, "epoch": 1.746031746031746, "percentage": 58.2, "elapsed_time": "1:06:25", "remaining_time": "0:47:42"}
68
+ {"current_steps": 670, "total_steps": 1134, "loss": 0.7484, "lr": 5e-06, "epoch": 1.7724867724867726, "percentage": 59.08, "elapsed_time": "1:07:24", "remaining_time": "0:46:40"}
69
+ {"current_steps": 680, "total_steps": 1134, "loss": 0.7425, "lr": 5e-06, "epoch": 1.798941798941799, "percentage": 59.96, "elapsed_time": "1:08:22", "remaining_time": "0:45:39"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4803d08a5fc210ba9b2bbf5779fb264f7415e660453648e1f7d317bc99c34458
3
  size 7288
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efcd1397e29d9e9e463cb3c83bc95dc1767e9d528cbac772697e88580f516784
3
  size 7288