sedrickkeh commited on
Commit
b1cc9ca
·
verified ·
1 Parent(s): b3688ed

Training in progress, epoch 1

Browse files
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1c1838964993927c52a3820a062f78dd84a3e37d8660449e14d105b1ad84c38a
3
  size 4976698672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b72a5308b86ed626818fb9e9d57a83acdc469e49df9ba80958831afbba23ea08
3
  size 4976698672
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a4c9dd3e2bfde50d44d7d2d791cc96e0facfc7a728c6e5f6719fc31d733ecb11
3
  size 4999802720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c1b212f24d8ce46bb0c91bbba54ff0e121078c3d1dff498795a94964e105717
3
  size 4999802720
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec7f2f2230854a2006651ab37e6b5631180800c06133e3d4cf336ff739119081
3
  size 4915916176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb4049d82f55d68063d3b2ece88ea0a8288b6e240649ae7d1ba236de28b3e3cc
3
  size 4915916176
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8e87882c94d86a6d497dd5ac8146f8f5fe9e3a3005e9104d9f60da12ccf395d1
3
  size 1168138808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5a514a556f9953f32d3ad0d238eb08998b6c48babfdeb9270d6de11a10937f3
3
  size 1168138808
trainer_log.jsonl CHANGED
@@ -74,3 +74,78 @@
74
  {"current_steps": 740, "total_steps": 2244, "loss": 0.8124, "lr": 5e-06, "epoch": 0.9891395154553049, "percentage": 32.98, "elapsed_time": "11:51:29", "remaining_time": "1 day, 0:06:02"}
75
  {"current_steps": 748, "total_steps": 2244, "eval_loss": 0.8087860345840454, "epoch": 0.9998329156223893, "percentage": 33.33, "elapsed_time": "12:12:30", "remaining_time": "1 day, 0:25:01"}
76
  {"current_steps": 750, "total_steps": 2244, "loss": 0.8763, "lr": 5e-06, "epoch": 1.0025062656641603, "percentage": 33.42, "elapsed_time": "12:15:26", "remaining_time": "1 day, 0:25:00"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  {"current_steps": 740, "total_steps": 2244, "loss": 0.8124, "lr": 5e-06, "epoch": 0.9891395154553049, "percentage": 32.98, "elapsed_time": "11:51:29", "remaining_time": "1 day, 0:06:02"}
75
  {"current_steps": 748, "total_steps": 2244, "eval_loss": 0.8087860345840454, "epoch": 0.9998329156223893, "percentage": 33.33, "elapsed_time": "12:12:30", "remaining_time": "1 day, 0:25:01"}
76
  {"current_steps": 750, "total_steps": 2244, "loss": 0.8763, "lr": 5e-06, "epoch": 1.0025062656641603, "percentage": 33.42, "elapsed_time": "12:15:26", "remaining_time": "1 day, 0:25:00"}
77
+ {"current_steps": 760, "total_steps": 2244, "loss": 0.7692, "lr": 5e-06, "epoch": 1.0158730158730158, "percentage": 33.87, "elapsed_time": "12:25:04", "remaining_time": "1 day, 0:14:50"}
78
+ {"current_steps": 770, "total_steps": 2244, "loss": 0.7719, "lr": 5e-06, "epoch": 1.0292397660818713, "percentage": 34.31, "elapsed_time": "12:34:41", "remaining_time": "1 day, 0:04:41"}
79
+ {"current_steps": 780, "total_steps": 2244, "loss": 0.7724, "lr": 5e-06, "epoch": 1.0426065162907268, "percentage": 34.76, "elapsed_time": "12:44:17", "remaining_time": "23:54:31"}
80
+ {"current_steps": 790, "total_steps": 2244, "loss": 0.7724, "lr": 5e-06, "epoch": 1.0559732664995822, "percentage": 35.2, "elapsed_time": "12:53:54", "remaining_time": "23:44:23"}
81
+ {"current_steps": 800, "total_steps": 2244, "loss": 0.7671, "lr": 5e-06, "epoch": 1.0693400167084377, "percentage": 35.65, "elapsed_time": "13:03:31", "remaining_time": "23:34:15"}
82
+ {"current_steps": 810, "total_steps": 2244, "loss": 0.7741, "lr": 5e-06, "epoch": 1.0827067669172932, "percentage": 36.1, "elapsed_time": "13:13:08", "remaining_time": "23:24:09"}
83
+ {"current_steps": 820, "total_steps": 2244, "loss": 0.7735, "lr": 5e-06, "epoch": 1.0960735171261486, "percentage": 36.54, "elapsed_time": "13:22:45", "remaining_time": "23:14:03"}
84
+ {"current_steps": 830, "total_steps": 2244, "loss": 0.7715, "lr": 5e-06, "epoch": 1.1094402673350041, "percentage": 36.99, "elapsed_time": "13:32:22", "remaining_time": "23:03:58"}
85
+ {"current_steps": 840, "total_steps": 2244, "loss": 0.7688, "lr": 5e-06, "epoch": 1.1228070175438596, "percentage": 37.43, "elapsed_time": "13:41:59", "remaining_time": "22:53:53"}
86
+ {"current_steps": 850, "total_steps": 2244, "loss": 0.7709, "lr": 5e-06, "epoch": 1.136173767752715, "percentage": 37.88, "elapsed_time": "13:51:36", "remaining_time": "22:43:49"}
87
+ {"current_steps": 860, "total_steps": 2244, "loss": 0.7756, "lr": 5e-06, "epoch": 1.1495405179615705, "percentage": 38.32, "elapsed_time": "14:01:13", "remaining_time": "22:33:46"}
88
+ {"current_steps": 870, "total_steps": 2244, "loss": 0.7762, "lr": 5e-06, "epoch": 1.162907268170426, "percentage": 38.77, "elapsed_time": "14:10:49", "remaining_time": "22:23:43"}
89
+ {"current_steps": 880, "total_steps": 2244, "loss": 0.7713, "lr": 5e-06, "epoch": 1.1762740183792815, "percentage": 39.22, "elapsed_time": "14:20:26", "remaining_time": "22:13:41"}
90
+ {"current_steps": 890, "total_steps": 2244, "loss": 0.7717, "lr": 5e-06, "epoch": 1.189640768588137, "percentage": 39.66, "elapsed_time": "14:30:04", "remaining_time": "22:03:40"}
91
+ {"current_steps": 900, "total_steps": 2244, "loss": 0.7714, "lr": 5e-06, "epoch": 1.2030075187969924, "percentage": 40.11, "elapsed_time": "14:39:41", "remaining_time": "21:53:40"}
92
+ {"current_steps": 910, "total_steps": 2244, "loss": 0.7727, "lr": 5e-06, "epoch": 1.2163742690058479, "percentage": 40.55, "elapsed_time": "14:49:18", "remaining_time": "21:43:39"}
93
+ {"current_steps": 920, "total_steps": 2244, "loss": 0.7715, "lr": 5e-06, "epoch": 1.2297410192147034, "percentage": 41.0, "elapsed_time": "14:58:55", "remaining_time": "21:33:39"}
94
+ {"current_steps": 930, "total_steps": 2244, "loss": 0.7682, "lr": 5e-06, "epoch": 1.2431077694235588, "percentage": 41.44, "elapsed_time": "15:08:31", "remaining_time": "21:23:40"}
95
+ {"current_steps": 940, "total_steps": 2244, "loss": 0.7753, "lr": 5e-06, "epoch": 1.2564745196324143, "percentage": 41.89, "elapsed_time": "15:18:08", "remaining_time": "21:13:41"}
96
+ {"current_steps": 950, "total_steps": 2244, "loss": 0.7726, "lr": 5e-06, "epoch": 1.2698412698412698, "percentage": 42.34, "elapsed_time": "15:27:45", "remaining_time": "21:03:42"}
97
+ {"current_steps": 960, "total_steps": 2244, "loss": 0.7721, "lr": 5e-06, "epoch": 1.2832080200501252, "percentage": 42.78, "elapsed_time": "15:37:22", "remaining_time": "20:53:44"}
98
+ {"current_steps": 970, "total_steps": 2244, "loss": 0.7702, "lr": 5e-06, "epoch": 1.2965747702589807, "percentage": 43.23, "elapsed_time": "15:46:59", "remaining_time": "20:43:46"}
99
+ {"current_steps": 980, "total_steps": 2244, "loss": 0.7755, "lr": 5e-06, "epoch": 1.3099415204678362, "percentage": 43.67, "elapsed_time": "15:56:35", "remaining_time": "20:33:48"}
100
+ {"current_steps": 990, "total_steps": 2244, "loss": 0.7742, "lr": 5e-06, "epoch": 1.3233082706766917, "percentage": 44.12, "elapsed_time": "16:06:12", "remaining_time": "20:23:51"}
101
+ {"current_steps": 1000, "total_steps": 2244, "loss": 0.7732, "lr": 5e-06, "epoch": 1.3366750208855471, "percentage": 44.56, "elapsed_time": "16:15:49", "remaining_time": "20:13:55"}
102
+ {"current_steps": 1010, "total_steps": 2244, "loss": 0.7663, "lr": 5e-06, "epoch": 1.3500417710944026, "percentage": 45.01, "elapsed_time": "16:25:25", "remaining_time": "20:03:59"}
103
+ {"current_steps": 1020, "total_steps": 2244, "loss": 0.7698, "lr": 5e-06, "epoch": 1.363408521303258, "percentage": 45.45, "elapsed_time": "16:35:02", "remaining_time": "19:54:03"}
104
+ {"current_steps": 1030, "total_steps": 2244, "loss": 0.7687, "lr": 5e-06, "epoch": 1.3767752715121135, "percentage": 45.9, "elapsed_time": "16:44:39", "remaining_time": "19:44:08"}
105
+ {"current_steps": 1040, "total_steps": 2244, "loss": 0.7707, "lr": 5e-06, "epoch": 1.390142021720969, "percentage": 46.35, "elapsed_time": "16:54:16", "remaining_time": "19:34:13"}
106
+ {"current_steps": 1050, "total_steps": 2244, "loss": 0.7698, "lr": 5e-06, "epoch": 1.4035087719298245, "percentage": 46.79, "elapsed_time": "17:03:52", "remaining_time": "19:24:17"}
107
+ {"current_steps": 1060, "total_steps": 2244, "loss": 0.7722, "lr": 5e-06, "epoch": 1.41687552213868, "percentage": 47.24, "elapsed_time": "17:13:28", "remaining_time": "19:14:21"}
108
+ {"current_steps": 1070, "total_steps": 2244, "loss": 0.7769, "lr": 5e-06, "epoch": 1.4302422723475354, "percentage": 47.68, "elapsed_time": "17:23:04", "remaining_time": "19:04:27"}
109
+ {"current_steps": 1080, "total_steps": 2244, "loss": 0.7709, "lr": 5e-06, "epoch": 1.443609022556391, "percentage": 48.13, "elapsed_time": "17:32:40", "remaining_time": "18:54:33"}
110
+ {"current_steps": 1090, "total_steps": 2244, "loss": 0.7652, "lr": 5e-06, "epoch": 1.4569757727652464, "percentage": 48.57, "elapsed_time": "17:42:18", "remaining_time": "18:44:40"}
111
+ {"current_steps": 1100, "total_steps": 2244, "loss": 0.7668, "lr": 5e-06, "epoch": 1.4703425229741018, "percentage": 49.02, "elapsed_time": "17:51:54", "remaining_time": "18:34:47"}
112
+ {"current_steps": 1110, "total_steps": 2244, "loss": 0.7748, "lr": 5e-06, "epoch": 1.4837092731829573, "percentage": 49.47, "elapsed_time": "18:01:31", "remaining_time": "18:24:54"}
113
+ {"current_steps": 1120, "total_steps": 2244, "loss": 0.7716, "lr": 5e-06, "epoch": 1.4970760233918128, "percentage": 49.91, "elapsed_time": "18:11:08", "remaining_time": "18:15:01"}
114
+ {"current_steps": 1130, "total_steps": 2244, "loss": 0.7693, "lr": 5e-06, "epoch": 1.5104427736006683, "percentage": 50.36, "elapsed_time": "18:20:44", "remaining_time": "18:05:09"}
115
+ {"current_steps": 1140, "total_steps": 2244, "loss": 0.7663, "lr": 5e-06, "epoch": 1.5238095238095237, "percentage": 50.8, "elapsed_time": "18:30:20", "remaining_time": "17:55:16"}
116
+ {"current_steps": 1150, "total_steps": 2244, "loss": 0.7707, "lr": 5e-06, "epoch": 1.5371762740183792, "percentage": 51.25, "elapsed_time": "18:39:56", "remaining_time": "17:45:24"}
117
+ {"current_steps": 1160, "total_steps": 2244, "loss": 0.7708, "lr": 5e-06, "epoch": 1.5505430242272347, "percentage": 51.69, "elapsed_time": "18:49:33", "remaining_time": "17:35:32"}
118
+ {"current_steps": 1170, "total_steps": 2244, "loss": 0.769, "lr": 5e-06, "epoch": 1.5639097744360901, "percentage": 52.14, "elapsed_time": "18:59:09", "remaining_time": "17:25:41"}
119
+ {"current_steps": 1180, "total_steps": 2244, "loss": 0.7722, "lr": 5e-06, "epoch": 1.5772765246449456, "percentage": 52.58, "elapsed_time": "19:08:45", "remaining_time": "17:15:49"}
120
+ {"current_steps": 1190, "total_steps": 2244, "loss": 0.7709, "lr": 5e-06, "epoch": 1.590643274853801, "percentage": 53.03, "elapsed_time": "19:18:22", "remaining_time": "17:05:59"}
121
+ {"current_steps": 1200, "total_steps": 2244, "loss": 0.7684, "lr": 5e-06, "epoch": 1.6040100250626566, "percentage": 53.48, "elapsed_time": "19:27:59", "remaining_time": "16:56:09"}
122
+ {"current_steps": 1210, "total_steps": 2244, "loss": 0.773, "lr": 5e-06, "epoch": 1.617376775271512, "percentage": 53.92, "elapsed_time": "19:37:37", "remaining_time": "16:46:19"}
123
+ {"current_steps": 1220, "total_steps": 2244, "loss": 0.7703, "lr": 5e-06, "epoch": 1.6307435254803675, "percentage": 54.37, "elapsed_time": "19:47:13", "remaining_time": "16:36:29"}
124
+ {"current_steps": 1230, "total_steps": 2244, "loss": 0.7684, "lr": 5e-06, "epoch": 1.644110275689223, "percentage": 54.81, "elapsed_time": "19:56:50", "remaining_time": "16:26:39"}
125
+ {"current_steps": 1240, "total_steps": 2244, "loss": 0.7648, "lr": 5e-06, "epoch": 1.6574770258980784, "percentage": 55.26, "elapsed_time": "20:06:27", "remaining_time": "16:16:50"}
126
+ {"current_steps": 1250, "total_steps": 2244, "loss": 0.7663, "lr": 5e-06, "epoch": 1.670843776106934, "percentage": 55.7, "elapsed_time": "20:16:04", "remaining_time": "16:07:01"}
127
+ {"current_steps": 1260, "total_steps": 2244, "loss": 0.7687, "lr": 5e-06, "epoch": 1.6842105263157894, "percentage": 56.15, "elapsed_time": "20:25:40", "remaining_time": "15:57:11"}
128
+ {"current_steps": 1270, "total_steps": 2244, "loss": 0.7677, "lr": 5e-06, "epoch": 1.6975772765246449, "percentage": 56.6, "elapsed_time": "20:35:17", "remaining_time": "15:47:22"}
129
+ {"current_steps": 1280, "total_steps": 2244, "loss": 0.7705, "lr": 5e-06, "epoch": 1.7109440267335003, "percentage": 57.04, "elapsed_time": "20:44:54", "remaining_time": "15:37:33"}
130
+ {"current_steps": 1290, "total_steps": 2244, "loss": 0.7664, "lr": 5e-06, "epoch": 1.7243107769423558, "percentage": 57.49, "elapsed_time": "20:54:31", "remaining_time": "15:27:45"}
131
+ {"current_steps": 1300, "total_steps": 2244, "loss": 0.7643, "lr": 5e-06, "epoch": 1.7376775271512113, "percentage": 57.93, "elapsed_time": "21:04:07", "remaining_time": "15:17:57"}
132
+ {"current_steps": 1310, "total_steps": 2244, "loss": 0.767, "lr": 5e-06, "epoch": 1.7510442773600667, "percentage": 58.38, "elapsed_time": "21:13:44", "remaining_time": "15:08:08"}
133
+ {"current_steps": 1320, "total_steps": 2244, "loss": 0.7701, "lr": 5e-06, "epoch": 1.7644110275689222, "percentage": 58.82, "elapsed_time": "21:23:21", "remaining_time": "14:58:21"}
134
+ {"current_steps": 1330, "total_steps": 2244, "loss": 0.7725, "lr": 5e-06, "epoch": 1.7777777777777777, "percentage": 59.27, "elapsed_time": "21:32:59", "remaining_time": "14:48:33"}
135
+ {"current_steps": 1340, "total_steps": 2244, "loss": 0.768, "lr": 5e-06, "epoch": 1.7911445279866332, "percentage": 59.71, "elapsed_time": "21:42:35", "remaining_time": "14:38:46"}
136
+ {"current_steps": 1350, "total_steps": 2244, "loss": 0.7685, "lr": 5e-06, "epoch": 1.8045112781954886, "percentage": 60.16, "elapsed_time": "21:52:13", "remaining_time": "14:28:59"}
137
+ {"current_steps": 1360, "total_steps": 2244, "loss": 0.7685, "lr": 5e-06, "epoch": 1.817878028404344, "percentage": 60.61, "elapsed_time": "22:01:51", "remaining_time": "14:19:12"}
138
+ {"current_steps": 1370, "total_steps": 2244, "loss": 0.7668, "lr": 5e-06, "epoch": 1.8312447786131996, "percentage": 61.05, "elapsed_time": "22:11:28", "remaining_time": "14:09:25"}
139
+ {"current_steps": 1380, "total_steps": 2244, "loss": 0.7685, "lr": 5e-06, "epoch": 1.844611528822055, "percentage": 61.5, "elapsed_time": "22:21:06", "remaining_time": "13:59:38"}
140
+ {"current_steps": 1390, "total_steps": 2244, "loss": 0.766, "lr": 5e-06, "epoch": 1.8579782790309105, "percentage": 61.94, "elapsed_time": "22:30:44", "remaining_time": "13:49:52"}
141
+ {"current_steps": 1400, "total_steps": 2244, "loss": 0.7675, "lr": 5e-06, "epoch": 1.871345029239766, "percentage": 62.39, "elapsed_time": "22:40:21", "remaining_time": "13:40:06"}
142
+ {"current_steps": 1410, "total_steps": 2244, "loss": 0.7651, "lr": 5e-06, "epoch": 1.8847117794486214, "percentage": 62.83, "elapsed_time": "22:49:58", "remaining_time": "13:30:19"}
143
+ {"current_steps": 1420, "total_steps": 2244, "loss": 0.7683, "lr": 5e-06, "epoch": 1.898078529657477, "percentage": 63.28, "elapsed_time": "22:59:35", "remaining_time": "13:20:32"}
144
+ {"current_steps": 1430, "total_steps": 2244, "loss": 0.7651, "lr": 5e-06, "epoch": 1.9114452798663324, "percentage": 63.73, "elapsed_time": "23:09:12", "remaining_time": "13:10:46"}
145
+ {"current_steps": 1440, "total_steps": 2244, "loss": 0.7664, "lr": 5e-06, "epoch": 1.9248120300751879, "percentage": 64.17, "elapsed_time": "23:18:49", "remaining_time": "13:01:00"}
146
+ {"current_steps": 1450, "total_steps": 2244, "loss": 0.7667, "lr": 5e-06, "epoch": 1.9381787802840433, "percentage": 64.62, "elapsed_time": "23:28:26", "remaining_time": "12:51:14"}
147
+ {"current_steps": 1460, "total_steps": 2244, "loss": 0.7649, "lr": 5e-06, "epoch": 1.9515455304928988, "percentage": 65.06, "elapsed_time": "23:38:04", "remaining_time": "12:41:29"}
148
+ {"current_steps": 1470, "total_steps": 2244, "loss": 0.7653, "lr": 5e-06, "epoch": 1.9649122807017543, "percentage": 65.51, "elapsed_time": "23:47:42", "remaining_time": "12:31:43"}
149
+ {"current_steps": 1480, "total_steps": 2244, "loss": 0.7665, "lr": 5e-06, "epoch": 1.9782790309106097, "percentage": 65.95, "elapsed_time": "23:57:19", "remaining_time": "12:21:58"}
150
+ {"current_steps": 1490, "total_steps": 2244, "loss": 0.7674, "lr": 5e-06, "epoch": 1.9916457811194652, "percentage": 66.4, "elapsed_time": "1 day, 0:06:57", "remaining_time": "12:12:13"}
151
+ {"current_steps": 1496, "total_steps": 2244, "eval_loss": 0.7951143383979797, "epoch": 1.9996658312447786, "percentage": 66.67, "elapsed_time": "1 day, 0:26:11", "remaining_time": "12:13:05"}