riczhou commited on
Commit
c832d6f
·
verified ·
1 Parent(s): 57271fd

Initial commit

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
logs.txt ADDED
@@ -0,0 +1,235 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0
  0%| | 0/171 [00:00<?, ?it/s]
1
 
 
2
  0%| | 0/171 [00:00<?, ?it/s]
3
 
 
4
  0%| | 0/171 [00:05<?, ?it/s]
5
  1%| | 1/171 [00:07<20:15, 7.15s/it]
6
 
 
7
  1%| | 1/171 [00:09<20:15, 7.15s/it]
8
  1%| | 2/171 [00:10<14:39, 5.20s/it]
9
 
 
10
  1%| | 2/171 [00:10<14:39, 5.20s/it]
11
 
 
12
  1%| | 2/171 [00:11<14:39, 5.20s/it]
13
  2%|▏ | 4/171 [00:11<05:29, 1.98s/it]
14
 
 
15
  2%|▏ | 4/171 [00:11<05:29, 1.98s/it]
16
  3%|▎ | 5/171 [00:11<03:57, 1.43s/it]
17
 
 
18
  3%|▎ | 5/171 [00:11<03:57, 1.43s/it]
19
 
 
20
  3%|▎ | 5/171 [00:11<03:57, 1.43s/it]
21
 
 
22
  3%|▎ | 5/171 [00:11<03:57, 1.43s/it]
23
  5%|▍ | 8/171 [00:11<01:44, 1.56it/s]
24
 
 
25
  5%|▍ | 8/171 [00:11<01:44, 1.56it/s]
26
 
 
27
  5%|▍ | 8/171 [00:11<01:44, 1.56it/s]
28
 
 
29
  5%|▍ | 8/171 [00:11<01:44, 1.56it/s]
30
  6%|▋ | 11/171 [00:11<00:58, 2.71it/s]
31
 
 
32
  6%|▋ | 11/171 [00:11<00:58, 2.71it/s]
33
 
 
34
  6%|▋ | 11/171 [00:11<00:58, 2.71it/s]
35
  8%|▊ | 13/171 [00:11<00:45, 3.44it/s]
36
 
 
37
  8%|▊ | 13/171 [00:11<00:45, 3.44it/s]
38
 
 
39
  8%|▊ | 13/171 [00:11<00:45, 3.44it/s]
40
  9%|▉ | 15/171 [00:11<00:34, 4.54it/s]
41
 
 
42
  9%|▉ | 15/171 [00:11<00:34, 4.54it/s]
43
 
 
44
  9%|▉ | 15/171 [00:11<00:34, 4.54it/s]
45
 
 
46
  9%|▉ | 15/171 [00:11<00:34, 4.54it/s]
47
  11%|█ | 18/171 [00:12<00:23, 6.57it/s]
48
 
 
49
  11%|█ | 18/171 [00:12<00:23, 6.57it/s]
50
 
 
51
  11%|█ | 18/171 [00:12<00:23, 6.57it/s]
52
  12%|█▏ | 20/171 [00:12<00:21, 7.11it/s]
53
 
 
54
  12%|█▏ | 20/171 [00:12<00:21, 7.11it/s]
55
 
 
56
  12%|█▏ | 20/171 [00:12<00:21, 7.11it/s]
57
  13%|█▎ | 22/171 [00:12<00:17, 8.63it/s]
58
 
 
59
  13%|█▎ | 22/171 [00:12<00:17, 8.63it/s]
60
 
 
61
  13%|█▎ | 22/171 [00:12<00:17, 8.63it/s]
62
 
 
63
  13%|█▎ | 22/171 [00:12<00:17, 8.63it/s]
64
  15%|█▍ | 25/171 [00:12<00:13, 11.19it/s]
65
 
 
66
  15%|█▍ | 25/171 [00:12<00:13, 11.19it/s]
67
 
 
68
  15%|█▍ | 25/171 [00:12<00:13, 11.19it/s]
69
  16%|█▌ | 27/171 [00:12<00:13, 10.56it/s]
70
 
 
71
  16%|█▌ | 27/171 [00:12<00:13, 10.56it/s]
72
 
 
73
  16%|█▌ | 27/171 [00:12<00:13, 10.56it/s]
74
  17%|█▋ | 29/171 [00:12<00:11, 12.06it/s]
75
 
 
76
  17%|█▋ | 29/171 [00:12<00:11, 12.06it/s]
77
 
 
78
  17%|█▋ | 29/171 [00:12<00:11, 12.06it/s]
79
 
 
80
  17%|█▋ | 29/171 [00:12<00:11, 12.06it/s]
81
  19%|█▊ | 32/171 [00:12<00:09, 14.52it/s]
82
 
 
83
  19%|█▊ | 32/171 [00:13<00:09, 14.52it/s]
84
 
 
85
  19%|█▊ | 32/171 [00:13<00:09, 14.52it/s]
86
  20%|█▉ | 34/171 [00:13<00:10, 12.58it/s]
87
 
 
88
  20%|█▉ | 34/171 [00:13<00:10, 12.58it/s]
89
 
 
90
  20%|█▉ | 34/171 [00:13<00:10, 12.58it/s]
91
  21%|██ | 36/171 [00:13<00:09, 13.89it/s]
92
 
 
93
  21%|██ | 36/171 [00:13<00:09, 13.89it/s]
94
 
 
95
  21%|██ | 36/171 [00:13<00:09, 13.89it/s]
96
 
 
97
  21%|██ | 36/171 [00:13<00:09, 13.89it/s]
98
  23%|██▎ | 39/171 [00:13<00:08, 16.13it/s]
99
 
 
100
  23%|██▎ | 39/171 [00:13<00:08, 16.13it/s]
101
 
 
102
  23%|██▎ | 39/171 [00:13<00:08, 16.13it/s]
103
  24%|██▍ | 41/171 [00:13<00:09, 13.54it/s]
104
 
 
105
  24%|██▍ | 41/171 [00:13<00:09, 13.54it/s]
106
 
 
107
  24%|██▍ | 41/171 [00:13<00:09, 13.54it/s]
108
  25%|██▌ | 43/171 [00:13<00:08, 14.73it/s]
109
 
 
110
  25%|██▌ | 43/171 [00:13<00:08, 14.73it/s]
111
 
 
112
  25%|██▌ | 43/171 [00:13<00:08, 14.73it/s]
113
 
 
114
  25%|██▌ | 43/171 [00:13<00:08, 14.73it/s]
115
  27%|██▋ | 46/171 [00:13<00:07, 16.83it/s]
116
 
 
117
  27%|██▋ | 46/171 [00:13<00:07, 16.83it/s]
118
 
 
119
  27%|██▋ | 46/171 [00:14<00:07, 16.83it/s]
120
  28%|██▊ | 48/171 [00:14<00:08, 14.08it/s]
121
 
 
122
  28%|██▊ | 48/171 [00:14<00:08, 14.08it/s]
123
 
 
124
  28%|██▊ | 48/171 [00:14<00:08, 14.08it/s]
125
  29%|██▉ | 50/171 [00:14<00:07, 15.24it/s]
126
 
 
127
  29%|██▉ | 50/171 [00:14<00:07, 15.24it/s]
128
 
 
129
  29%|██▉ | 50/171 [00:14<00:07, 15.24it/s]
130
 
 
131
  29%|██▉ | 50/171 [00:14<00:07, 15.24it/s]
132
  31%|███ | 53/171 [00:14<00:06, 17.29it/s]
133
 
 
134
  31%|███ | 53/171 [00:14<00:06, 17.29it/s]
135
 
 
136
  31%|███ | 53/171 [00:14<00:06, 17.29it/s]
137
  32%|███▏ | 55/171 [00:14<00:08, 14.41it/s]
138
 
 
139
  32%|███▏ | 55/171 [00:14<00:08, 14.41it/s]
140
 
 
141
  32%|███▏ | 55/171 [00:14<00:08, 14.41it/s]
142
  33%|███▎ | 57/171 [00:14<00:07, 15.56it/s]
143
 
 
144
  33%|███▎ | 57/171 [00:14<00:07, 15.56it/s]
145
 
 
146
  33%|███▎ | 57/171 [00:14<00:07, 15.56it/s]
147
 
 
148
  33%|███▎ | 57/171 [00:14<00:07, 15.56it/s]
149
  35%|███▌ | 60/171 [00:14<00:06, 17.64it/s]
150
 
 
151
  35%|███▌ | 60/171 [00:14<00:06, 17.64it/s]
152
 
 
153
  35%|███▌ | 60/171 [00:14<00:06, 17.64it/s]
154
  36%|███▋ | 62/171 [00:14<00:07, 14.59it/s]
155
 
 
156
  36%|███▋ | 62/171 [00:14<00:07, 14.59it/s]
157
 
 
158
  36%|███▋ | 62/171 [00:14<00:07, 14.59it/s]
159
  37%|███▋ | 64/171 [00:15<00:06, 15.72it/s]
160
 
 
161
  37%|███▋ | 64/171 [00:15<00:06, 15.72it/s]
162
 
 
163
  37%|███▋ | 64/171 [00:15<00:06, 15.72it/s]
164
 
 
165
  37%|███▋ | 64/171 [00:15<00:06, 15.72it/s]
166
  39%|███▉ | 67/171 [00:15<00:05, 17.79it/s]
167
 
 
168
  39%|███▉ | 67/171 [00:15<00:05, 17.79it/s]
169
 
 
170
  39%|███▉ | 67/171 [00:15<00:05, 17.79it/s]
171
  40%|████ | 69/171 [00:15<00:06, 14.67it/s]
172
 
 
173
  40%|████ | 69/171 [00:15<00:06, 14.67it/s]
174
 
 
175
  40%|████ | 69/171 [00:15<00:06, 14.67it/s]
176
  42%|████▏ | 71/171 [00:15<00:06, 15.80it/s]
177
 
 
178
  42%|████▏ | 71/171 [00:15<00:06, 15.80it/s]
179
 
 
180
  42%|████▏ | 71/171 [00:15<00:06, 15.80it/s]
181
 
 
182
  42%|████▏ | 71/171 [00:15<00:06, 15.80it/s]
183
  43%|████▎ | 74/171 [00:15<00:05, 17.85it/s]
184
 
 
185
  43%|████▎ | 74/171 [00:15<00:05, 17.85it/s]
186
 
 
187
  43%|████▎ | 74/171 [00:15<00:05, 17.85it/s]
188
  44%|████▍ | 76/171 [00:15<00:06, 14.73it/s]
189
 
 
190
  44%|████▍ | 76/171 [00:15<00:06, 14.73it/s]
191
 
 
192
  44%|████▍ | 76/171 [00:15<00:06, 14.73it/s]
193
  46%|████▌ | 78/171 [00:15<00:05, 15.82it/s]
194
 
 
195
  46%|████▌ | 78/171 [00:15<00:05, 15.82it/s]
196
 
 
197
  46%|████▌ | 78/171 [00:15<00:05, 15.82it/s]
198
 
 
199
  46%|████▌ | 78/171 [00:16<00:05, 15.82it/s]
200
  47%|████▋ | 81/171 [00:16<00:05, 17.86it/s]
201
 
 
202
  47%|████▋ | 81/171 [00:16<00:05, 17.86it/s]
203
 
 
204
  47%|████▋ | 81/171 [00:16<00:05, 17.86it/s]
205
  49%|████▊ | 83/171 [00:16<00:05, 14.72it/s]
206
 
 
207
  49%|████▊ | 83/171 [00:16<00:05, 14.72it/s]
208
 
 
209
  49%|████▊ | 83/171 [00:16<00:05, 14.72it/s]
210
  50%|████▉ | 85/171 [00:16<00:05, 15.83it/s]
211
 
 
212
  50%|████▉ | 85/171 [00:16<00:05, 15.83it/s]
213
 
 
214
  50%|████▉ | 85/171 [00:16<00:05, 15.83it/s]
215
 
 
216
  50%|████▉ | 85/171 [00:16<00:05, 15.83it/s]
217
  51%|█████▏ | 88/171 [00:16<00:04, 17.89it/s]
218
 
 
219
  51%|█████▏ | 88/171 [00:16<00:04, 17.89it/s]
220
 
 
221
  51%|█████▏ | 88/171 [00:16<00:04, 17.89it/s]
222
  53%|█████▎ | 90/171 [00:16<00:05, 14.73it/s]
223
 
 
224
  53%|█████▎ | 90/171 [00:16<00:05, 14.73it/s]
225
 
 
226
  53%|█████▎ | 90/171 [00:16<00:05, 14.73it/s]
227
  54%|█████▍ | 92/171 [00:16<00:05, 15.76it/s]
228
 
 
229
  54%|█████▍ | 92/171 [00:16<00:05, 15.76it/s]
230
 
 
231
  54%|█████▍ | 92/171 [00:16<00:05, 15.76it/s]
232
 
 
233
  54%|█████▍ | 92/171 [00:16<00:05, 15.76it/s]
234
  56%|█████▌ | 95/171 [00:16<00:04, 17.86it/s]
235
 
 
236
  56%|█████▌ | 95/171 [00:16<00:04, 17.86it/s]
237
 
 
238
  56%|█████▌ | 95/171 [00:17<00:04, 17.86it/s]
239
  57%|█████▋ | 97/171 [00:17<00:05, 14.78it/s]
240
 
 
241
  57%|█████▋ | 97/171 [00:17<00:05, 14.78it/s]
242
 
 
243
  57%|█████▋ | 97/171 [00:17<00:05, 14.78it/s]
244
  58%|█████▊ | 99/171 [00:17<00:04, 15.87it/s]
245
 
 
246
  58%|█████▊ | 99/171 [00:17<00:04, 15.87it/s]
247
 
 
248
  58%|█████▊ | 99/171 [00:17<00:04, 15.87it/s]
249
 
 
250
  58%|█████▊ | 99/171 [00:17<00:04, 15.87it/s]
251
  60%|█████▉ | 102/171 [00:17<00:03, 17.97it/s]
252
 
 
253
  60%|█████▉ | 102/171 [00:17<00:03, 17.97it/s]
254
 
 
255
  60%|█████▉ | 102/171 [00:17<00:03, 17.97it/s]
256
  61%|██████ | 104/171 [00:17<00:04, 14.77it/s]
257
 
 
258
  61%|██████ | 104/171 [00:17<00:04, 14.77it/s]
259
 
 
260
  61%|██████ | 104/171 [00:17<00:04, 14.77it/s]
261
  62%|██████▏ | 106/171 [00:17<00:04, 15.85it/s]
262
 
 
263
  62%|██████▏ | 106/171 [00:17<00:04, 15.85it/s]
264
 
 
265
  62%|██████▏ | 106/171 [00:17<00:04, 15.85it/s]
266
 
 
267
  62%|██████▏ | 106/171 [00:17<00:04, 15.85it/s]
268
  64%|██████▎ | 109/171 [00:17<00:03, 17.50it/s]
269
 
 
270
  64%|██████▎ | 109/171 [00:18<00:03, 17.50it/s]
271
 
 
272
  64%|██████▎ | 109/171 [00:18<00:03, 17.50it/s]
273
  65%|██████▍ | 111/171 [00:18<00:05, 11.52it/s]
274
 
 
275
  65%|██████▍ | 111/171 [00:18<00:05, 11.52it/s]
276
 
 
277
  65%|██████▍ | 111/171 [00:18<00:05, 11.52it/s]
278
  66%|██████▌ | 113/171 [00:18<00:04, 12.95it/s]
279
 
 
280
  66%|██████▌ | 113/171 [00:18<00:04, 12.95it/s]
281
 
 
282
  66%|██████▌ | 113/171 [00:18<00:04, 12.95it/s]
283
 
 
284
  66%|██████▌ | 113/171 [00:18<00:04, 12.95it/s]
285
  68%|██████▊ | 116/171 [00:18<00:03, 15.50it/s]
286
 
 
287
  68%|██████▊ | 116/171 [00:18<00:03, 15.50it/s]
288
 
 
289
  68%|██████▊ | 116/171 [00:18<00:03, 15.50it/s]
290
  69%|██████▉ | 118/171 [00:18<00:04, 11.18it/s]
291
 
 
292
  69%|██████▉ | 118/171 [00:18<00:04, 11.18it/s]
293
 
 
294
  69%|██████▉ | 118/171 [00:18<00:04, 11.18it/s]
295
  70%|███████ | 120/171 [00:18<00:04, 12.65it/s]
296
 
 
297
  70%|███████ | 120/171 [00:18<00:04, 12.65it/s]
298
 
 
299
  70%|███████ | 120/171 [00:18<00:04, 12.65it/s]
300
 
 
301
  70%|███████ | 120/171 [00:18<00:04, 12.65it/s]
302
  72%|███████▏ | 123/171 [00:18<00:03, 15.18it/s]
303
 
 
304
  72%|███████▏ | 123/171 [00:19<00:03, 15.18it/s]
305
 
 
306
  72%|███████▏ | 123/171 [00:19<00:03, 15.18it/s]
307
  73%|███████▎ | 125/171 [00:19<00:04, 11.25it/s]
308
 
 
309
  73%|███████▎ | 125/171 [00:19<00:04, 11.25it/s]
310
 
 
311
  73%|███████▎ | 125/171 [00:19<00:04, 11.25it/s]
312
  74%|███████▍ | 127/171 [00:19<00:03, 12.38it/s]
313
 
 
314
  74%|███████▍ | 127/171 [00:19<00:03, 12.38it/s]
315
 
 
316
  74%|███████▍ | 127/171 [00:19<00:03, 12.38it/s]
317
 
 
318
  74%|███████▍ | 127/171 [00:19<00:03, 12.38it/s]
319
  76%|███████▌ | 130/171 [00:19<00:02, 14.83it/s]
320
 
 
321
  76%|███████▌ | 130/171 [00:19<00:02, 14.83it/s]
322
 
 
323
  76%|███████▌ | 130/171 [00:19<00:02, 14.83it/s]
324
  77%|███████▋ | 132/171 [00:19<00:04, 9.01it/s]
325
 
 
326
  77%|███████▋ | 132/171 [00:19<00:04, 9.01it/s]
327
 
 
328
  77%|███████▋ | 132/171 [00:19<00:04, 9.01it/s]
329
  78%|███████▊ | 134/171 [00:20<00:03, 10.52it/s]
330
 
 
331
  78%|███████▊ | 134/171 [00:20<00:03, 10.52it/s]
332
 
 
333
  78%|███████▊ | 134/171 [00:20<00:03, 10.52it/s]
334
 
 
335
  78%|███████▊ | 134/171 [00:20<00:03, 10.52it/s]
336
  80%|████████ | 137/171 [00:20<00:02, 13.07it/s]
337
 
 
338
  80%|████████ | 137/171 [00:20<00:02, 13.07it/s]
339
 
 
340
  80%|████████ | 137/171 [00:20<00:02, 13.07it/s]
341
  81%|████████▏ | 139/171 [00:20<00:03, 10.27it/s]
342
 
 
343
  81%|████████▏ | 139/171 [00:20<00:03, 10.27it/s]
344
 
 
345
  81%|████████▏ | 139/171 [00:20<00:03, 10.27it/s]
346
  82%|████████▏ | 141/171 [00:20<00:02, 11.76it/s]
347
 
 
348
  82%|████████▏ | 141/171 [00:20<00:02, 11.76it/s]
349
 
 
350
  82%|████████▏ | 141/171 [00:20<00:02, 11.76it/s]
351
 
 
352
  82%|████████▏ | 141/171 [00:20<00:02, 11.76it/s]
353
  84%|████████▍ | 144/171 [00:20<00:01, 14.27it/s]
354
 
 
355
  84%|████████▍ | 144/171 [00:20<00:01, 14.27it/s]
356
 
 
357
  84%|████████▍ | 144/171 [00:20<00:01, 14.27it/s]
358
  85%|████████▌ | 146/171 [00:20<00:02, 11.77it/s]
359
 
 
360
  85%|████████▌ | 146/171 [00:20<00:02, 11.77it/s]
361
 
 
362
  85%|████████▌ | 146/171 [00:21<00:02, 11.77it/s]
363
  87%|████████▋ | 148/171 [00:21<00:01, 13.04it/s]
364
 
 
365
  87%|████████▋ | 148/171 [00:21<00:01, 13.04it/s]
366
 
 
367
  87%|████████▋ | 148/171 [00:21<00:01, 13.04it/s]
368
 
 
369
  87%|████████▋ | 148/171 [00:21<00:01, 13.04it/s]
370
  88%|████████▊ | 151/171 [00:21<00:01, 15.45it/s]
371
 
 
372
  88%|████████▊ | 151/171 [00:21<00:01, 15.45it/s]
373
 
 
374
  88%|████████▊ | 151/171 [00:21<00:01, 15.45it/s]
375
  89%|████████▉ | 153/171 [00:21<00:01, 11.67it/s]
376
 
 
377
  89%|████████▉ | 153/171 [00:21<00:01, 11.67it/s]
378
 
 
379
  89%|████████▉ | 153/171 [00:21<00:01, 11.67it/s]
380
  91%|█████████ | 155/171 [00:21<00:01, 13.12it/s]
381
 
 
382
  91%|█████████ | 155/171 [00:21<00:01, 13.12it/s]
383
 
 
384
  91%|█████████ | 155/171 [00:21<00:01, 13.12it/s]
385
 
 
386
  91%|█████████ | 155/171 [00:21<00:01, 13.12it/s]
387
  92%|█████████▏| 158/171 [00:21<00:00, 15.60it/s]
388
 
 
389
  92%|█████████▏| 158/171 [00:21<00:00, 15.60it/s]
390
 
 
391
  92%|█████████▏| 158/171 [00:22<00:00, 15.60it/s]
392
  94%|█████████▎| 160/171 [00:22<00:01, 10.41it/s]
393
 
 
394
  94%|█████████▎| 160/171 [00:22<00:01, 10.41it/s]
395
 
 
396
  94%|█████████▎| 160/171 [00:22<00:01, 10.41it/s]
397
  95%|█████████▍| 162/171 [00:22<00:00, 11.89it/s]
398
 
 
399
  95%|█████████▍| 162/171 [00:22<00:00, 11.89it/s]
400
 
 
401
  95%|█████████▍| 162/171 [00:22<00:00, 11.89it/s]
402
 
 
403
  95%|█████████▍| 162/171 [00:22<00:00, 11.89it/s]
404
  96%|█████████▋| 165/171 [00:22<00:00, 14.35it/s]
405
 
 
406
  96%|█████████▋| 165/171 [00:22<00:00, 14.35it/s]
407
 
 
408
  96%|█████████▋| 165/171 [00:22<00:00, 14.35it/s]
409
  98%|█████████▊| 167/171 [00:22<00:00, 9.14it/s]
410
 
 
411
  98%|█████████▊| 167/171 [00:22<00:00, 9.14it/s]
412
 
 
413
  98%|█████████▊| 167/171 [00:22<00:00, 9.14it/s]
414
  99%|█████████▉| 169/171 [00:22<00:00, 10.65it/s]
415
 
 
416
  99%|█████████▉| 169/171 [00:22<00:00, 10.65it/s]
417
 
 
418
  99%|█████████▉| 169/171 [00:22<00:00, 10.65it/s]
 
 
 
 
 
 
 
 
 
 
1
+ /opt/conda/envs/py310/bin/python -m mlc_llm gen_config /models/Qwen1.5-1.8B-Chat --quantization q0f16 --conv-template chatml --output /models/mlc-delivery/hf/mlc-ai/Qwen1.5-1.8B-Chat-q0f16-MLC
2
+ [2024-06-04 03:34:12] INFO auto_config.py:116: Found model configuration: /models/Qwen1.5-1.8B-Chat/config.json
3
+ [2024-06-04 03:34:12] INFO auto_config.py:154: Found model type: qwen2. Use `--model-type` to override.
4
+ [2024-06-04 03:34:12] INFO qwen2_model.py:49: context_window_size not found in config.json. Falling back to max_position_embeddings (32768)
5
+ [2024-06-04 03:34:12] INFO qwen2_model.py:66: prefill_chunk_size defaults to 2048
6
+ [2024-06-04 03:34:12] INFO config.py:107: Overriding max_batch_size from 1 to 80
7
+ [2024-06-04 03:34:12] INFO gen_config.py:143: [generation_config.json] Setting bos_token_id: 151643
8
+ [2024-06-04 03:34:12] INFO gen_config.py:143: [generation_config.json] Setting pad_token_id: 151643
9
+ [2024-06-04 03:34:12] INFO gen_config.py:143: [generation_config.json] Setting eos_token_id: [151645, 151643]
10
+ [2024-06-04 03:34:12] INFO gen_config.py:143: [generation_config.json] Setting repetition_penalty: 1.1
11
+ [2024-06-04 03:34:12] INFO gen_config.py:143: [generation_config.json] Setting top_p: 0.8
12
+ [2024-06-04 03:34:12] INFO gen_config.py:157: Not found tokenizer config: /models/Qwen1.5-1.8B-Chat/tokenizer.model
13
+ [2024-06-04 03:34:12] INFO gen_config.py:155: Found tokenizer config: /models/Qwen1.5-1.8B-Chat/tokenizer.json. Copying to /models/mlc-delivery/hf/mlc-ai/Qwen1.5-1.8B-Chat-q0f16-MLC/tokenizer.json
14
+ [2024-06-04 03:34:12] INFO gen_config.py:155: Found tokenizer config: /models/Qwen1.5-1.8B-Chat/vocab.json. Copying to /models/mlc-delivery/hf/mlc-ai/Qwen1.5-1.8B-Chat-q0f16-MLC/vocab.json
15
+ [2024-06-04 03:34:12] INFO gen_config.py:155: Found tokenizer config: /models/Qwen1.5-1.8B-Chat/merges.txt. Copying to /models/mlc-delivery/hf/mlc-ai/Qwen1.5-1.8B-Chat-q0f16-MLC/merges.txt
16
+ [2024-06-04 03:34:12] INFO gen_config.py:157: Not found tokenizer config: /models/Qwen1.5-1.8B-Chat/added_tokens.json
17
+ [2024-06-04 03:34:12] INFO gen_config.py:155: Found tokenizer config: /models/Qwen1.5-1.8B-Chat/tokenizer_config.json. Copying to /models/mlc-delivery/hf/mlc-ai/Qwen1.5-1.8B-Chat-q0f16-MLC/tokenizer_config.json
18
+ [2024-06-04 03:34:12] INFO gen_config.py:216: Detected tokenizer info: {'token_postproc_method': 'byte_level', 'prepend_space_in_encode': False, 'strip_space_in_decode': False}
19
+ [2024-06-04 03:34:12] INFO gen_config.py:32: [System default] Setting temperature: 1.0
20
+ [2024-06-04 03:34:12] INFO gen_config.py:32: [System default] Setting presence_penalty: 0.0
21
+ [2024-06-04 03:34:12] INFO gen_config.py:32: [System default] Setting frequency_penalty: 0.0
22
+ [2024-06-04 03:34:12] INFO gen_config.py:32: [System default] Setting mean_gen_len: 128
23
+ [2024-06-04 03:34:12] INFO gen_config.py:32: [System default] Setting max_gen_len: 512
24
+ [2024-06-04 03:34:12] INFO gen_config.py:32: [System default] Setting shift_fill_factor: 0.3
25
+ [2024-06-04 03:34:12] INFO gen_config.py:223: Dumping configuration file to: /models/mlc-delivery/hf/mlc-ai/Qwen1.5-1.8B-Chat-q0f16-MLC/mlc-chat-config.json
26
+ /opt/conda/envs/py310/bin/python -m mlc_llm convert_weight /models/Qwen1.5-1.8B-Chat --quantization q0f16 --output /models/mlc-delivery/hf/mlc-ai/Qwen1.5-1.8B-Chat-q0f16-MLC
27
+ [2024-06-04 03:34:14] INFO auto_config.py:116: Found model configuration: /models/Qwen1.5-1.8B-Chat/config.json
28
+ [2024-06-04 03:34:15] INFO auto_device.py:79: Found device: cuda:0
29
+ [2024-06-04 03:34:17] INFO auto_device.py:88: Not found device: rocm:0
30
+ [2024-06-04 03:34:18] INFO auto_device.py:88: Not found device: metal:0
31
+ [2024-06-04 03:34:20] INFO auto_device.py:79: Found device: vulkan:0
32
+ [2024-06-04 03:34:20] INFO auto_device.py:79: Found device: vulkan:1
33
+ [2024-06-04 03:34:20] INFO auto_device.py:79: Found device: vulkan:2
34
+ [2024-06-04 03:34:20] INFO auto_device.py:79: Found device: vulkan:3
35
+ [2024-06-04 03:34:21] INFO auto_device.py:88: Not found device: opencl:0
36
+ [2024-06-04 03:34:21] INFO auto_device.py:35: Using device: cuda:0
37
+ [2024-06-04 03:34:21] INFO auto_weight.py:71: Finding weights in: /models/Qwen1.5-1.8B-Chat
38
+ [2024-06-04 03:34:21] INFO auto_weight.py:137: Not found Huggingface PyTorch
39
+ [2024-06-04 03:34:21] INFO auto_weight.py:144: Found source weight format: huggingface-safetensor. Source configuration: /models/Qwen1.5-1.8B-Chat/model.safetensors.index.json
40
+ [2024-06-04 03:34:21] INFO auto_weight.py:107: Using source weight configuration: /models/Qwen1.5-1.8B-Chat/model.safetensors.index.json. Use `--source` to override.
41
+ [2024-06-04 03:34:21] INFO auto_weight.py:111: Using source weight format: huggingface-safetensor. Use `--source-format` to override.
42
+ [2024-06-04 03:34:21] INFO auto_config.py:154: Found model type: qwen2. Use `--model-type` to override.
43
+ [2024-06-04 03:34:21] INFO qwen2_model.py:49: context_window_size not found in config.json. Falling back to max_position_embeddings (32768)
44
+ [2024-06-04 03:34:21] INFO qwen2_model.py:66: prefill_chunk_size defaults to 2048
45
+ Weight conversion with arguments:
46
+ --config /models/Qwen1.5-1.8B-Chat/config.json
47
+ --quantization NoQuantize(name='q0f16', kind='no-quant', model_dtype='float16')
48
+ --model-type qwen2
49
+ --device cuda:0
50
+ --source /models/Qwen1.5-1.8B-Chat/model.safetensors.index.json
51
+ --source-format huggingface-safetensor
52
+ --output /models/mlc-delivery/hf/mlc-ai/Qwen1.5-1.8B-Chat-q0f16-MLC
53
+ Start storing to cache /models/mlc-delivery/hf/mlc-ai/Qwen1.5-1.8B-Chat-q0f16-MLC
54
+
55
  0%| | 0/171 [00:00<?, ?it/s]
56
 
57
+
58
  0%| | 0/171 [00:00<?, ?it/s]
59
 
60
+
61
  0%| | 0/171 [00:05<?, ?it/s]
62
  1%| | 1/171 [00:07<20:15, 7.15s/it]
63
 
64
+
65
  1%| | 1/171 [00:09<20:15, 7.15s/it]
66
  1%| | 2/171 [00:10<14:39, 5.20s/it]
67
 
68
+
69
  1%| | 2/171 [00:10<14:39, 5.20s/it]
70
 
71
+
72
  1%| | 2/171 [00:11<14:39, 5.20s/it]
73
  2%|▏ | 4/171 [00:11<05:29, 1.98s/it]
74
 
75
+
76
  2%|▏ | 4/171 [00:11<05:29, 1.98s/it]
77
  3%|▎ | 5/171 [00:11<03:57, 1.43s/it]
78
 
79
+
80
  3%|▎ | 5/171 [00:11<03:57, 1.43s/it]
81
 
82
+
83
  3%|▎ | 5/171 [00:11<03:57, 1.43s/it]
84
 
85
+
86
  3%|▎ | 5/171 [00:11<03:57, 1.43s/it]
87
  5%|▍ | 8/171 [00:11<01:44, 1.56it/s]
88
 
89
+
90
  5%|▍ | 8/171 [00:11<01:44, 1.56it/s]
91
 
92
+
93
  5%|▍ | 8/171 [00:11<01:44, 1.56it/s]
94
 
95
+
96
  5%|▍ | 8/171 [00:11<01:44, 1.56it/s]
97
  6%|▋ | 11/171 [00:11<00:58, 2.71it/s]
98
 
99
+
100
  6%|▋ | 11/171 [00:11<00:58, 2.71it/s]
101
 
102
+
103
  6%|▋ | 11/171 [00:11<00:58, 2.71it/s]
104
  8%|▊ | 13/171 [00:11<00:45, 3.44it/s]
105
 
106
+
107
  8%|▊ | 13/171 [00:11<00:45, 3.44it/s]
108
 
109
+
110
  8%|▊ | 13/171 [00:11<00:45, 3.44it/s]
111
  9%|▉ | 15/171 [00:11<00:34, 4.54it/s]
112
 
113
+
114
  9%|▉ | 15/171 [00:11<00:34, 4.54it/s]
115
 
116
+
117
  9%|▉ | 15/171 [00:11<00:34, 4.54it/s]
118
 
119
+
120
  9%|▉ | 15/171 [00:11<00:34, 4.54it/s]
121
  11%|█ | 18/171 [00:12<00:23, 6.57it/s]
122
 
123
+
124
  11%|█ | 18/171 [00:12<00:23, 6.57it/s]
125
 
126
+
127
  11%|█ | 18/171 [00:12<00:23, 6.57it/s]
128
  12%|█▏ | 20/171 [00:12<00:21, 7.11it/s]
129
 
130
+
131
  12%|█▏ | 20/171 [00:12<00:21, 7.11it/s]
132
 
133
+
134
  12%|█▏ | 20/171 [00:12<00:21, 7.11it/s]
135
  13%|█▎ | 22/171 [00:12<00:17, 8.63it/s]
136
 
137
+
138
  13%|█▎ | 22/171 [00:12<00:17, 8.63it/s]
139
 
140
+
141
  13%|█▎ | 22/171 [00:12<00:17, 8.63it/s]
142
 
143
+
144
  13%|█▎ | 22/171 [00:12<00:17, 8.63it/s]
145
  15%|█▍ | 25/171 [00:12<00:13, 11.19it/s]
146
 
147
+
148
  15%|█▍ | 25/171 [00:12<00:13, 11.19it/s]
149
 
150
+
151
  15%|█▍ | 25/171 [00:12<00:13, 11.19it/s]
152
  16%|█▌ | 27/171 [00:12<00:13, 10.56it/s]
153
 
154
+
155
  16%|█▌ | 27/171 [00:12<00:13, 10.56it/s]
156
 
157
+
158
  16%|█▌ | 27/171 [00:12<00:13, 10.56it/s]
159
  17%|█▋ | 29/171 [00:12<00:11, 12.06it/s]
160
 
161
+
162
  17%|█▋ | 29/171 [00:12<00:11, 12.06it/s]
163
 
164
+
165
  17%|█▋ | 29/171 [00:12<00:11, 12.06it/s]
166
 
167
+
168
  17%|█▋ | 29/171 [00:12<00:11, 12.06it/s]
169
  19%|█▊ | 32/171 [00:12<00:09, 14.52it/s]
170
 
171
+
172
  19%|█▊ | 32/171 [00:13<00:09, 14.52it/s]
173
 
174
+
175
  19%|█▊ | 32/171 [00:13<00:09, 14.52it/s]
176
  20%|█▉ | 34/171 [00:13<00:10, 12.58it/s]
177
 
178
+
179
  20%|█▉ | 34/171 [00:13<00:10, 12.58it/s]
180
 
181
+
182
  20%|█▉ | 34/171 [00:13<00:10, 12.58it/s]
183
  21%|██ | 36/171 [00:13<00:09, 13.89it/s]
184
 
185
+
186
  21%|██ | 36/171 [00:13<00:09, 13.89it/s]
187
 
188
+
189
  21%|██ | 36/171 [00:13<00:09, 13.89it/s]
190
 
191
+
192
  21%|██ | 36/171 [00:13<00:09, 13.89it/s]
193
  23%|██▎ | 39/171 [00:13<00:08, 16.13it/s]
194
 
195
+
196
  23%|██▎ | 39/171 [00:13<00:08, 16.13it/s]
197
 
198
+
199
  23%|██▎ | 39/171 [00:13<00:08, 16.13it/s]
200
  24%|██▍ | 41/171 [00:13<00:09, 13.54it/s]
201
 
202
+
203
  24%|██▍ | 41/171 [00:13<00:09, 13.54it/s]
204
 
205
+
206
  24%|██▍ | 41/171 [00:13<00:09, 13.54it/s]
207
  25%|██▌ | 43/171 [00:13<00:08, 14.73it/s]
208
 
209
+
210
  25%|██▌ | 43/171 [00:13<00:08, 14.73it/s]
211
 
212
+
213
  25%|██▌ | 43/171 [00:13<00:08, 14.73it/s]
214
 
215
+
216
  25%|██▌ | 43/171 [00:13<00:08, 14.73it/s]
217
  27%|██▋ | 46/171 [00:13<00:07, 16.83it/s]
218
 
219
+
220
  27%|██▋ | 46/171 [00:13<00:07, 16.83it/s]
221
 
222
+
223
  27%|██▋ | 46/171 [00:14<00:07, 16.83it/s]
224
  28%|██▊ | 48/171 [00:14<00:08, 14.08it/s]
225
 
226
+
227
  28%|██▊ | 48/171 [00:14<00:08, 14.08it/s]
228
 
229
+
230
  28%|██▊ | 48/171 [00:14<00:08, 14.08it/s]
231
  29%|██▉ | 50/171 [00:14<00:07, 15.24it/s]
232
 
233
+
234
  29%|██▉ | 50/171 [00:14<00:07, 15.24it/s]
235
 
236
+
237
  29%|██▉ | 50/171 [00:14<00:07, 15.24it/s]
238
 
239
+
240
  29%|██▉ | 50/171 [00:14<00:07, 15.24it/s]
241
  31%|███ | 53/171 [00:14<00:06, 17.29it/s]
242
 
243
+
244
  31%|███ | 53/171 [00:14<00:06, 17.29it/s]
245
 
246
+
247
  31%|███ | 53/171 [00:14<00:06, 17.29it/s]
248
  32%|███▏ | 55/171 [00:14<00:08, 14.41it/s]
249
 
250
+
251
  32%|███▏ | 55/171 [00:14<00:08, 14.41it/s]
252
 
253
+
254
  32%|███▏ | 55/171 [00:14<00:08, 14.41it/s]
255
  33%|███▎ | 57/171 [00:14<00:07, 15.56it/s]
256
 
257
+
258
  33%|███▎ | 57/171 [00:14<00:07, 15.56it/s]
259
 
260
+
261
  33%|███▎ | 57/171 [00:14<00:07, 15.56it/s]
262
 
263
+
264
  33%|███▎ | 57/171 [00:14<00:07, 15.56it/s]
265
  35%|███▌ | 60/171 [00:14<00:06, 17.64it/s]
266
 
267
+
268
  35%|███▌ | 60/171 [00:14<00:06, 17.64it/s]
269
 
270
+
271
  35%|███▌ | 60/171 [00:14<00:06, 17.64it/s]
272
  36%|███▋ | 62/171 [00:14<00:07, 14.59it/s]
273
 
274
+
275
  36%|███▋ | 62/171 [00:14<00:07, 14.59it/s]
276
 
277
+
278
  36%|███▋ | 62/171 [00:14<00:07, 14.59it/s]
279
  37%|███▋ | 64/171 [00:15<00:06, 15.72it/s]
280
 
281
+
282
  37%|███▋ | 64/171 [00:15<00:06, 15.72it/s]
283
 
284
+
285
  37%|███▋ | 64/171 [00:15<00:06, 15.72it/s]
286
 
287
+
288
  37%|███▋ | 64/171 [00:15<00:06, 15.72it/s]
289
  39%|███▉ | 67/171 [00:15<00:05, 17.79it/s]
290
 
291
+
292
  39%|███▉ | 67/171 [00:15<00:05, 17.79it/s]
293
 
294
+
295
  39%|███▉ | 67/171 [00:15<00:05, 17.79it/s]
296
  40%|████ | 69/171 [00:15<00:06, 14.67it/s]
297
 
298
+
299
  40%|████ | 69/171 [00:15<00:06, 14.67it/s]
300
 
301
+
302
  40%|████ | 69/171 [00:15<00:06, 14.67it/s]
303
  42%|████▏ | 71/171 [00:15<00:06, 15.80it/s]
304
 
305
+
306
  42%|████▏ | 71/171 [00:15<00:06, 15.80it/s]
307
 
308
+
309
  42%|████▏ | 71/171 [00:15<00:06, 15.80it/s]
310
 
311
+
312
  42%|████▏ | 71/171 [00:15<00:06, 15.80it/s]
313
  43%|████▎ | 74/171 [00:15<00:05, 17.85it/s]
314
 
315
+
316
  43%|████▎ | 74/171 [00:15<00:05, 17.85it/s]
317
 
318
+
319
  43%|████▎ | 74/171 [00:15<00:05, 17.85it/s]
320
  44%|████▍ | 76/171 [00:15<00:06, 14.73it/s]
321
 
322
+
323
  44%|████▍ | 76/171 [00:15<00:06, 14.73it/s]
324
 
325
+
326
  44%|████▍ | 76/171 [00:15<00:06, 14.73it/s]
327
  46%|████▌ | 78/171 [00:15<00:05, 15.82it/s]
328
 
329
+
330
  46%|████▌ | 78/171 [00:15<00:05, 15.82it/s]
331
 
332
+
333
  46%|████▌ | 78/171 [00:15<00:05, 15.82it/s]
334
 
335
+
336
  46%|████▌ | 78/171 [00:16<00:05, 15.82it/s]
337
  47%|████▋ | 81/171 [00:16<00:05, 17.86it/s]
338
 
339
+
340
  47%|████▋ | 81/171 [00:16<00:05, 17.86it/s]
341
 
342
+
343
  47%|████▋ | 81/171 [00:16<00:05, 17.86it/s]
344
  49%|████▊ | 83/171 [00:16<00:05, 14.72it/s]
345
 
346
+
347
  49%|████▊ | 83/171 [00:16<00:05, 14.72it/s]
348
 
349
+
350
  49%|████▊ | 83/171 [00:16<00:05, 14.72it/s]
351
  50%|████▉ | 85/171 [00:16<00:05, 15.83it/s]
352
 
353
+
354
  50%|████▉ | 85/171 [00:16<00:05, 15.83it/s]
355
 
356
+
357
  50%|████▉ | 85/171 [00:16<00:05, 15.83it/s]
358
 
359
+
360
  50%|████▉ | 85/171 [00:16<00:05, 15.83it/s]
361
  51%|█████▏ | 88/171 [00:16<00:04, 17.89it/s]
362
 
363
+
364
  51%|█████▏ | 88/171 [00:16<00:04, 17.89it/s]
365
 
366
+
367
  51%|█████▏ | 88/171 [00:16<00:04, 17.89it/s]
368
  53%|█████▎ | 90/171 [00:16<00:05, 14.73it/s]
369
 
370
+
371
  53%|█████▎ | 90/171 [00:16<00:05, 14.73it/s]
372
 
373
+
374
  53%|█████▎ | 90/171 [00:16<00:05, 14.73it/s]
375
  54%|█████▍ | 92/171 [00:16<00:05, 15.76it/s]
376
 
377
+
378
  54%|█████▍ | 92/171 [00:16<00:05, 15.76it/s]
379
 
380
+
381
  54%|█████▍ | 92/171 [00:16<00:05, 15.76it/s]
382
 
383
+
384
  54%|█████▍ | 92/171 [00:16<00:05, 15.76it/s]
385
  56%|█████▌ | 95/171 [00:16<00:04, 17.86it/s]
386
 
387
+
388
  56%|█████▌ | 95/171 [00:16<00:04, 17.86it/s]
389
 
390
+
391
  56%|█████▌ | 95/171 [00:17<00:04, 17.86it/s]
392
  57%|█████▋ | 97/171 [00:17<00:05, 14.78it/s]
393
 
394
+
395
  57%|█████▋ | 97/171 [00:17<00:05, 14.78it/s]
396
 
397
+
398
  57%|█████▋ | 97/171 [00:17<00:05, 14.78it/s]
399
  58%|█████▊ | 99/171 [00:17<00:04, 15.87it/s]
400
 
401
+
402
  58%|█████▊ | 99/171 [00:17<00:04, 15.87it/s]
403
 
404
+
405
  58%|█████▊ | 99/171 [00:17<00:04, 15.87it/s]
406
 
407
+
408
  58%|█████▊ | 99/171 [00:17<00:04, 15.87it/s]
409
  60%|█████▉ | 102/171 [00:17<00:03, 17.97it/s]
410
 
411
+
412
  60%|█████▉ | 102/171 [00:17<00:03, 17.97it/s]
413
 
414
+
415
  60%|█████▉ | 102/171 [00:17<00:03, 17.97it/s]
416
  61%|██████ | 104/171 [00:17<00:04, 14.77it/s]
417
 
418
+
419
  61%|██████ | 104/171 [00:17<00:04, 14.77it/s]
420
 
421
+
422
  61%|██████ | 104/171 [00:17<00:04, 14.77it/s]
423
  62%|██████▏ | 106/171 [00:17<00:04, 15.85it/s]
424
 
425
+
426
  62%|██████▏ | 106/171 [00:17<00:04, 15.85it/s]
427
 
428
+
429
  62%|██████▏ | 106/171 [00:17<00:04, 15.85it/s]
430
 
431
+
432
  62%|██████▏ | 106/171 [00:17<00:04, 15.85it/s]
433
  64%|██████▎ | 109/171 [00:17<00:03, 17.50it/s]
434
 
435
+
436
  64%|██████▎ | 109/171 [00:18<00:03, 17.50it/s]
437
 
438
+
439
  64%|██████▎ | 109/171 [00:18<00:03, 17.50it/s]
440
  65%|██████▍ | 111/171 [00:18<00:05, 11.52it/s]
441
 
442
+
443
  65%|██████▍ | 111/171 [00:18<00:05, 11.52it/s]
444
 
445
+
446
  65%|██████▍ | 111/171 [00:18<00:05, 11.52it/s]
447
  66%|██████▌ | 113/171 [00:18<00:04, 12.95it/s]
448
 
449
+
450
  66%|██████▌ | 113/171 [00:18<00:04, 12.95it/s]
451
 
452
+
453
  66%|██████▌ | 113/171 [00:18<00:04, 12.95it/s]
454
 
455
+
456
  66%|██████▌ | 113/171 [00:18<00:04, 12.95it/s]
457
  68%|██████▊ | 116/171 [00:18<00:03, 15.50it/s]
458
 
459
+
460
  68%|██████▊ | 116/171 [00:18<00:03, 15.50it/s]
461
 
462
+
463
  68%|██████▊ | 116/171 [00:18<00:03, 15.50it/s]
464
  69%|██████▉ | 118/171 [00:18<00:04, 11.18it/s]
465
 
466
+
467
  69%|██████▉ | 118/171 [00:18<00:04, 11.18it/s]
468
 
469
+
470
  69%|██████▉ | 118/171 [00:18<00:04, 11.18it/s]
471
  70%|███████ | 120/171 [00:18<00:04, 12.65it/s]
472
 
473
+
474
  70%|███████ | 120/171 [00:18<00:04, 12.65it/s]
475
 
476
+
477
  70%|███████ | 120/171 [00:18<00:04, 12.65it/s]
478
 
479
+
480
  70%|███████ | 120/171 [00:18<00:04, 12.65it/s]
481
  72%|███████▏ | 123/171 [00:18<00:03, 15.18it/s]
482
 
483
+
484
  72%|███████▏ | 123/171 [00:19<00:03, 15.18it/s]
485
 
486
+
487
  72%|███████▏ | 123/171 [00:19<00:03, 15.18it/s]
488
  73%|███████▎ | 125/171 [00:19<00:04, 11.25it/s]
489
 
490
+
491
  73%|███████▎ | 125/171 [00:19<00:04, 11.25it/s]
492
 
493
+
494
  73%|███████▎ | 125/171 [00:19<00:04, 11.25it/s]
495
  74%|███████▍ | 127/171 [00:19<00:03, 12.38it/s]
496
 
497
+
498
  74%|███████▍ | 127/171 [00:19<00:03, 12.38it/s]
499
 
500
+
501
  74%|███████▍ | 127/171 [00:19<00:03, 12.38it/s]
502
 
503
+
504
  74%|███████▍ | 127/171 [00:19<00:03, 12.38it/s]
505
  76%|███████▌ | 130/171 [00:19<00:02, 14.83it/s]
506
 
507
+
508
  76%|███████▌ | 130/171 [00:19<00:02, 14.83it/s]
509
 
510
+
511
  76%|███████▌ | 130/171 [00:19<00:02, 14.83it/s]
512
  77%|███████▋ | 132/171 [00:19<00:04, 9.01it/s]
513
 
514
+
515
  77%|███████▋ | 132/171 [00:19<00:04, 9.01it/s]
516
 
517
+
518
  77%|███████▋ | 132/171 [00:19<00:04, 9.01it/s]
519
  78%|███████▊ | 134/171 [00:20<00:03, 10.52it/s]
520
 
521
+
522
  78%|███████▊ | 134/171 [00:20<00:03, 10.52it/s]
523
 
524
+
525
  78%|███████▊ | 134/171 [00:20<00:03, 10.52it/s]
526
 
527
+
528
  78%|███████▊ | 134/171 [00:20<00:03, 10.52it/s]
529
  80%|████████ | 137/171 [00:20<00:02, 13.07it/s]
530
 
531
+
532
  80%|████████ | 137/171 [00:20<00:02, 13.07it/s]
533
 
534
+
535
  80%|████████ | 137/171 [00:20<00:02, 13.07it/s]
536
  81%|████████▏ | 139/171 [00:20<00:03, 10.27it/s]
537
 
538
+
539
  81%|████████▏ | 139/171 [00:20<00:03, 10.27it/s]
540
 
541
+
542
  81%|████████▏ | 139/171 [00:20<00:03, 10.27it/s]
543
  82%|████████▏ | 141/171 [00:20<00:02, 11.76it/s]
544
 
545
+
546
  82%|████████▏ | 141/171 [00:20<00:02, 11.76it/s]
547
 
548
+
549
  82%|████████▏ | 141/171 [00:20<00:02, 11.76it/s]
550
 
551
+
552
  82%|████████▏ | 141/171 [00:20<00:02, 11.76it/s]
553
  84%|████████▍ | 144/171 [00:20<00:01, 14.27it/s]
554
 
555
+
556
  84%|████████▍ | 144/171 [00:20<00:01, 14.27it/s]
557
 
558
+
559
  84%|████████▍ | 144/171 [00:20<00:01, 14.27it/s]
560
  85%|████████▌ | 146/171 [00:20<00:02, 11.77it/s]
561
 
562
+
563
  85%|████████▌ | 146/171 [00:20<00:02, 11.77it/s]
564
 
565
+
566
  85%|████████▌ | 146/171 [00:21<00:02, 11.77it/s]
567
  87%|████████▋ | 148/171 [00:21<00:01, 13.04it/s]
568
 
569
+
570
  87%|████████▋ | 148/171 [00:21<00:01, 13.04it/s]
571
 
572
+
573
  87%|████████▋ | 148/171 [00:21<00:01, 13.04it/s]
574
 
575
+
576
  87%|████████▋ | 148/171 [00:21<00:01, 13.04it/s]
577
  88%|████████▊ | 151/171 [00:21<00:01, 15.45it/s]
578
 
579
+
580
  88%|████████▊ | 151/171 [00:21<00:01, 15.45it/s]
581
 
582
+
583
  88%|████████▊ | 151/171 [00:21<00:01, 15.45it/s]
584
  89%|████████▉ | 153/171 [00:21<00:01, 11.67it/s]
585
 
586
+
587
  89%|████████▉ | 153/171 [00:21<00:01, 11.67it/s]
588
 
589
+
590
  89%|████████▉ | 153/171 [00:21<00:01, 11.67it/s]
591
  91%|█████████ | 155/171 [00:21<00:01, 13.12it/s]
592
 
593
+
594
  91%|█████████ | 155/171 [00:21<00:01, 13.12it/s]
595
 
596
+
597
  91%|█████████ | 155/171 [00:21<00:01, 13.12it/s]
598
 
599
+
600
  91%|█████████ | 155/171 [00:21<00:01, 13.12it/s]
601
  92%|█████████▏| 158/171 [00:21<00:00, 15.60it/s]
602
 
603
+
604
  92%|█████████▏| 158/171 [00:21<00:00, 15.60it/s]
605
 
606
+
607
  92%|█████████▏| 158/171 [00:22<00:00, 15.60it/s]
608
  94%|█████████▎| 160/171 [00:22<00:01, 10.41it/s]
609
 
610
+
611
  94%|█████████▎| 160/171 [00:22<00:01, 10.41it/s]
612
 
613
+
614
  94%|█████████▎| 160/171 [00:22<00:01, 10.41it/s]
615
  95%|█████████▍| 162/171 [00:22<00:00, 11.89it/s]
616
 
617
+
618
  95%|█████████▍| 162/171 [00:22<00:00, 11.89it/s]
619
 
620
+
621
  95%|█████████▍| 162/171 [00:22<00:00, 11.89it/s]
622
 
623
+
624
  95%|█████████▍| 162/171 [00:22<00:00, 11.89it/s]
625
  96%|█████████▋| 165/171 [00:22<00:00, 14.35it/s]
626
 
627
+
628
  96%|█████████▋| 165/171 [00:22<00:00, 14.35it/s]
629
 
630
+
631
  96%|█████████▋| 165/171 [00:22<00:00, 14.35it/s]
632
  98%|█████████▊| 167/171 [00:22<00:00, 9.14it/s]
633
 
634
+
635
  98%|█████████▊| 167/171 [00:22<00:00, 9.14it/s]
636
 
637
+
638
  98%|█████████▊| 167/171 [00:22<00:00, 9.14it/s]
639
  99%|█████████▉| 169/171 [00:22<00:00, 10.65it/s]
640
 
641
+
642
  99%|█████████▉| 169/171 [00:22<00:00, 10.65it/s]
643
 
644
+
645
  99%|█████████▉| 169/171 [00:22<00:00, 10.65it/s]
646
+ [2024-06-04 03:34:45] INFO huggingface_loader.py:197: Unloading HF weight file: /models/Qwen1.5-1.8B-Chat/model.safetensors
647
+ [2024-06-04 03:34:45] INFO stats.py:77: Time usage: HF loading: 4.456 sec; Pre-quantization mapping: 8.772 sec; Quantization: 0.000 sec
648
+ [2024-06-04 03:34:45] INFO stats.py:91: RAM usage: Peak RAM: 6.843 GB. Total bytes loaded from disk: 6.843 GB
649
+ [2024-06-04 03:34:45] INFO convert_weight.py:155: Parameter size after quantization: 3.421 GB
650
+ [2024-06-04 03:34:45] INFO convert_weight.py:160: Total parameters: 1,836,828,672
651
+ [2024-06-04 03:34:45] INFO convert_weight.py:161: Bits per parameter: 16.000
652
+ [2024-06-04 03:34:45] INFO convert_weight.py:166: Saved to directory: /models/mlc-delivery/hf/mlc-ai/Qwen1.5-1.8B-Chat-q0f16-MLC
653
+
654
+ All finished, 75 total shards committed, record saved to /models/mlc-delivery/hf/mlc-ai/Qwen1.5-1.8B-Chat-q0f16-MLC/ndarray-cache.json
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
mlc-chat-config.json ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "0.1.0",
3
+ "model_type": "qwen2",
4
+ "quantization": "q0f16",
5
+ "model_config": {
6
+ "hidden_act": "silu",
7
+ "hidden_size": 2048,
8
+ "intermediate_size": 5504,
9
+ "num_attention_heads": 16,
10
+ "num_hidden_layers": 24,
11
+ "num_key_value_heads": 16,
12
+ "rms_norm_eps": 1e-06,
13
+ "rope_theta": 1000000.0,
14
+ "vocab_size": 151936,
15
+ "context_window_size": 32768,
16
+ "prefill_chunk_size": 2048,
17
+ "tensor_parallel_shards": 1,
18
+ "head_dim": 128,
19
+ "dtype": "float32",
20
+ "max_batch_size": 80
21
+ },
22
+ "vocab_size": 151936,
23
+ "context_window_size": 32768,
24
+ "sliding_window_size": -1,
25
+ "prefill_chunk_size": 2048,
26
+ "attention_sink_size": -1,
27
+ "tensor_parallel_shards": 1,
28
+ "temperature": 1.0,
29
+ "presence_penalty": 0.0,
30
+ "frequency_penalty": 0.0,
31
+ "repetition_penalty": 1.1,
32
+ "top_p": 0.8,
33
+ "tokenizer_files": [
34
+ "tokenizer.json",
35
+ "vocab.json",
36
+ "merges.txt",
37
+ "tokenizer_config.json"
38
+ ],
39
+ "tokenizer_info": {
40
+ "token_postproc_method": "byte_level",
41
+ "prepend_space_in_encode": false,
42
+ "strip_space_in_decode": false
43
+ },
44
+ "conv_template": {
45
+ "name": "chatml",
46
+ "system_template": "<|im_start|>system\n{system_message}",
47
+ "system_message": "A conversation between a user and an LLM-based AI assistant. The assistant gives helpful and honest answers.",
48
+ "system_prefix_token_ids": null,
49
+ "add_role_after_system_message": true,
50
+ "roles": {
51
+ "user": "<|im_start|>user",
52
+ "assistant": "<|im_start|>assistant"
53
+ },
54
+ "role_templates": {
55
+ "user": "{user_message}",
56
+ "assistant": "{assistant_message}",
57
+ "tool": "{tool_message}"
58
+ },
59
+ "messages": [],
60
+ "seps": [
61
+ "<|im_end|>\n"
62
+ ],
63
+ "role_content_sep": "\n",
64
+ "role_empty_sep": "\n",
65
+ "stop_str": [
66
+ "<|im_end|>"
67
+ ],
68
+ "stop_token_ids": [
69
+ 2
70
+ ],
71
+ "function_string": "",
72
+ "use_function_calling": false
73
+ },
74
+ "pad_token_id": 151643,
75
+ "bos_token_id": 151643,
76
+ "eos_token_id": [
77
+ 151645,
78
+ 151643
79
+ ],
80
+ "mean_gen_len": 128,
81
+ "max_gen_len": 512,
82
+ "shift_fill_factor": 0.3
83
+ }
ndarray-cache.json ADDED
@@ -0,0 +1,2417 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "ParamSize": 171,
4
+ "ParamBytes": 3673657344.0,
5
+ "BitsPerParam": 16.0
6
+ },
7
+ "records": [
8
+ {
9
+ "dataPath": "params_shard_0.bin",
10
+ "format": "raw-shard",
11
+ "nbytes": 622329856,
12
+ "records": [
13
+ {
14
+ "name": "lm_head.weight",
15
+ "shape": [
16
+ 151936,
17
+ 2048
18
+ ],
19
+ "dtype": "float16",
20
+ "format": "f32-to-bf16",
21
+ "nbytes": 622329856,
22
+ "byteOffset": 0
23
+ }
24
+ ],
25
+ "md5sum": "83c85f622c9c298d6de5cd6d58789736"
26
+ },
27
+ {
28
+ "dataPath": "params_shard_1.bin",
29
+ "format": "raw-shard",
30
+ "nbytes": 622329856,
31
+ "records": [
32
+ {
33
+ "name": "model.embed_tokens.weight",
34
+ "shape": [
35
+ 151936,
36
+ 2048
37
+ ],
38
+ "dtype": "float16",
39
+ "format": "f32-to-bf16",
40
+ "nbytes": 622329856,
41
+ "byteOffset": 0
42
+ }
43
+ ],
44
+ "md5sum": "206ebbfb149647701dbeb218436b7d8f"
45
+ },
46
+ {
47
+ "dataPath": "params_shard_2.bin",
48
+ "format": "raw-shard",
49
+ "nbytes": 45088768,
50
+ "records": [
51
+ {
52
+ "name": "model.layers.0.mlp.gate_up_proj.weight",
53
+ "shape": [
54
+ 11008,
55
+ 2048
56
+ ],
57
+ "dtype": "float16",
58
+ "format": "f32-to-bf16",
59
+ "nbytes": 45088768,
60
+ "byteOffset": 0
61
+ }
62
+ ],
63
+ "md5sum": "09c446e956ab6d4a147bd40f2de7bea5"
64
+ },
65
+ {
66
+ "dataPath": "params_shard_3.bin",
67
+ "format": "raw-shard",
68
+ "nbytes": 25165824,
69
+ "records": [
70
+ {
71
+ "name": "model.layers.0.self_attn.c_attn.weight",
72
+ "shape": [
73
+ 6144,
74
+ 2048
75
+ ],
76
+ "dtype": "float16",
77
+ "format": "f32-to-bf16",
78
+ "nbytes": 25165824,
79
+ "byteOffset": 0
80
+ }
81
+ ],
82
+ "md5sum": "0292eee7b82a5b618c0ccb911837b204"
83
+ },
84
+ {
85
+ "dataPath": "params_shard_4.bin",
86
+ "format": "raw-shard",
87
+ "nbytes": 22544384,
88
+ "records": [
89
+ {
90
+ "name": "model.layers.1.mlp.down_proj.weight",
91
+ "shape": [
92
+ 2048,
93
+ 5504
94
+ ],
95
+ "dtype": "float16",
96
+ "format": "f32-to-bf16",
97
+ "nbytes": 22544384,
98
+ "byteOffset": 0
99
+ }
100
+ ],
101
+ "md5sum": "2bc2c7e7d9bbab5dae35f6341df3d5d4"
102
+ },
103
+ {
104
+ "dataPath": "params_shard_5.bin",
105
+ "format": "raw-shard",
106
+ "nbytes": 45088768,
107
+ "records": [
108
+ {
109
+ "name": "model.layers.1.mlp.gate_up_proj.weight",
110
+ "shape": [
111
+ 11008,
112
+ 2048
113
+ ],
114
+ "dtype": "float16",
115
+ "format": "f32-to-bf16",
116
+ "nbytes": 45088768,
117
+ "byteOffset": 0
118
+ }
119
+ ],
120
+ "md5sum": "223e4f9d1bc3272485ebef2b95d1f1f5"
121
+ },
122
+ {
123
+ "dataPath": "params_shard_6.bin",
124
+ "format": "raw-shard",
125
+ "nbytes": 25165824,
126
+ "records": [
127
+ {
128
+ "name": "model.layers.1.self_attn.c_attn.weight",
129
+ "shape": [
130
+ 6144,
131
+ 2048
132
+ ],
133
+ "dtype": "float16",
134
+ "format": "f32-to-bf16",
135
+ "nbytes": 25165824,
136
+ "byteOffset": 0
137
+ }
138
+ ],
139
+ "md5sum": "b6d9f3d75d83c86c7a34fabaf5a622dd"
140
+ },
141
+ {
142
+ "dataPath": "params_shard_7.bin",
143
+ "format": "raw-shard",
144
+ "nbytes": 30973952,
145
+ "records": [
146
+ {
147
+ "name": "model.layers.0.input_layernorm.weight",
148
+ "shape": [
149
+ 2048
150
+ ],
151
+ "dtype": "float16",
152
+ "format": "f32-to-bf16",
153
+ "nbytes": 4096,
154
+ "byteOffset": 0
155
+ },
156
+ {
157
+ "name": "model.layers.0.mlp.down_proj.weight",
158
+ "shape": [
159
+ 2048,
160
+ 5504
161
+ ],
162
+ "dtype": "float16",
163
+ "format": "f32-to-bf16",
164
+ "nbytes": 22544384,
165
+ "byteOffset": 4096
166
+ },
167
+ {
168
+ "name": "model.layers.0.post_attention_layernorm.weight",
169
+ "shape": [
170
+ 2048
171
+ ],
172
+ "dtype": "float16",
173
+ "format": "f32-to-bf16",
174
+ "nbytes": 4096,
175
+ "byteOffset": 22548480
176
+ },
177
+ {
178
+ "name": "model.layers.0.self_attn.c_attn.bias",
179
+ "shape": [
180
+ 6144
181
+ ],
182
+ "dtype": "float16",
183
+ "format": "f32-to-bf16",
184
+ "nbytes": 12288,
185
+ "byteOffset": 22552576
186
+ },
187
+ {
188
+ "name": "model.layers.0.self_attn.o_proj.weight",
189
+ "shape": [
190
+ 2048,
191
+ 2048
192
+ ],
193
+ "dtype": "float16",
194
+ "format": "f32-to-bf16",
195
+ "nbytes": 8388608,
196
+ "byteOffset": 22564864
197
+ },
198
+ {
199
+ "name": "model.layers.1.input_layernorm.weight",
200
+ "shape": [
201
+ 2048
202
+ ],
203
+ "dtype": "float16",
204
+ "format": "f32-to-bf16",
205
+ "nbytes": 4096,
206
+ "byteOffset": 30953472
207
+ },
208
+ {
209
+ "name": "model.layers.1.post_attention_layernorm.weight",
210
+ "shape": [
211
+ 2048
212
+ ],
213
+ "dtype": "float16",
214
+ "format": "f32-to-bf16",
215
+ "nbytes": 4096,
216
+ "byteOffset": 30957568
217
+ },
218
+ {
219
+ "name": "model.layers.1.self_attn.c_attn.bias",
220
+ "shape": [
221
+ 6144
222
+ ],
223
+ "dtype": "float16",
224
+ "format": "f32-to-bf16",
225
+ "nbytes": 12288,
226
+ "byteOffset": 30961664
227
+ }
228
+ ],
229
+ "md5sum": "64a2cf6c36a6ef5aaf9da3622d3eb666"
230
+ },
231
+ {
232
+ "dataPath": "params_shard_8.bin",
233
+ "format": "raw-shard",
234
+ "nbytes": 45088768,
235
+ "records": [
236
+ {
237
+ "name": "model.layers.10.mlp.gate_up_proj.weight",
238
+ "shape": [
239
+ 11008,
240
+ 2048
241
+ ],
242
+ "dtype": "float16",
243
+ "format": "f32-to-bf16",
244
+ "nbytes": 45088768,
245
+ "byteOffset": 0
246
+ }
247
+ ],
248
+ "md5sum": "5facbe96fed8417fd67054abece53006"
249
+ },
250
+ {
251
+ "dataPath": "params_shard_9.bin",
252
+ "format": "raw-shard",
253
+ "nbytes": 25165824,
254
+ "records": [
255
+ {
256
+ "name": "model.layers.10.self_attn.c_attn.weight",
257
+ "shape": [
258
+ 6144,
259
+ 2048
260
+ ],
261
+ "dtype": "float16",
262
+ "format": "f32-to-bf16",
263
+ "nbytes": 25165824,
264
+ "byteOffset": 0
265
+ }
266
+ ],
267
+ "md5sum": "1511e9775e7b1fb03f1096d48b60301e"
268
+ },
269
+ {
270
+ "dataPath": "params_shard_10.bin",
271
+ "format": "raw-shard",
272
+ "nbytes": 30953472,
273
+ "records": [
274
+ {
275
+ "name": "model.layers.1.self_attn.o_proj.weight",
276
+ "shape": [
277
+ 2048,
278
+ 2048
279
+ ],
280
+ "dtype": "float16",
281
+ "format": "f32-to-bf16",
282
+ "nbytes": 8388608,
283
+ "byteOffset": 0
284
+ },
285
+ {
286
+ "name": "model.layers.10.input_layernorm.weight",
287
+ "shape": [
288
+ 2048
289
+ ],
290
+ "dtype": "float16",
291
+ "format": "f32-to-bf16",
292
+ "nbytes": 4096,
293
+ "byteOffset": 8388608
294
+ },
295
+ {
296
+ "name": "model.layers.10.mlp.down_proj.weight",
297
+ "shape": [
298
+ 2048,
299
+ 5504
300
+ ],
301
+ "dtype": "float16",
302
+ "format": "f32-to-bf16",
303
+ "nbytes": 22544384,
304
+ "byteOffset": 8392704
305
+ },
306
+ {
307
+ "name": "model.layers.10.post_attention_layernorm.weight",
308
+ "shape": [
309
+ 2048
310
+ ],
311
+ "dtype": "float16",
312
+ "format": "f32-to-bf16",
313
+ "nbytes": 4096,
314
+ "byteOffset": 30937088
315
+ },
316
+ {
317
+ "name": "model.layers.10.self_attn.c_attn.bias",
318
+ "shape": [
319
+ 6144
320
+ ],
321
+ "dtype": "float16",
322
+ "format": "f32-to-bf16",
323
+ "nbytes": 12288,
324
+ "byteOffset": 30941184
325
+ }
326
+ ],
327
+ "md5sum": "801c9302955ade10a6a38eea1cdd663e"
328
+ },
329
+ {
330
+ "dataPath": "params_shard_11.bin",
331
+ "format": "raw-shard",
332
+ "nbytes": 45088768,
333
+ "records": [
334
+ {
335
+ "name": "model.layers.11.mlp.gate_up_proj.weight",
336
+ "shape": [
337
+ 11008,
338
+ 2048
339
+ ],
340
+ "dtype": "float16",
341
+ "format": "f32-to-bf16",
342
+ "nbytes": 45088768,
343
+ "byteOffset": 0
344
+ }
345
+ ],
346
+ "md5sum": "79cfed7dc713b299f61b18dee20376dc"
347
+ },
348
+ {
349
+ "dataPath": "params_shard_12.bin",
350
+ "format": "raw-shard",
351
+ "nbytes": 25165824,
352
+ "records": [
353
+ {
354
+ "name": "model.layers.11.self_attn.c_attn.weight",
355
+ "shape": [
356
+ 6144,
357
+ 2048
358
+ ],
359
+ "dtype": "float16",
360
+ "format": "f32-to-bf16",
361
+ "nbytes": 25165824,
362
+ "byteOffset": 0
363
+ }
364
+ ],
365
+ "md5sum": "2839deaf8ac8792833e0a170eb677773"
366
+ },
367
+ {
368
+ "dataPath": "params_shard_13.bin",
369
+ "format": "raw-shard",
370
+ "nbytes": 30953472,
371
+ "records": [
372
+ {
373
+ "name": "model.layers.10.self_attn.o_proj.weight",
374
+ "shape": [
375
+ 2048,
376
+ 2048
377
+ ],
378
+ "dtype": "float16",
379
+ "format": "f32-to-bf16",
380
+ "nbytes": 8388608,
381
+ "byteOffset": 0
382
+ },
383
+ {
384
+ "name": "model.layers.11.input_layernorm.weight",
385
+ "shape": [
386
+ 2048
387
+ ],
388
+ "dtype": "float16",
389
+ "format": "f32-to-bf16",
390
+ "nbytes": 4096,
391
+ "byteOffset": 8388608
392
+ },
393
+ {
394
+ "name": "model.layers.11.mlp.down_proj.weight",
395
+ "shape": [
396
+ 2048,
397
+ 5504
398
+ ],
399
+ "dtype": "float16",
400
+ "format": "f32-to-bf16",
401
+ "nbytes": 22544384,
402
+ "byteOffset": 8392704
403
+ },
404
+ {
405
+ "name": "model.layers.11.post_attention_layernorm.weight",
406
+ "shape": [
407
+ 2048
408
+ ],
409
+ "dtype": "float16",
410
+ "format": "f32-to-bf16",
411
+ "nbytes": 4096,
412
+ "byteOffset": 30937088
413
+ },
414
+ {
415
+ "name": "model.layers.11.self_attn.c_attn.bias",
416
+ "shape": [
417
+ 6144
418
+ ],
419
+ "dtype": "float16",
420
+ "format": "f32-to-bf16",
421
+ "nbytes": 12288,
422
+ "byteOffset": 30941184
423
+ }
424
+ ],
425
+ "md5sum": "56dbcb95902d10b2165a68fdd689a853"
426
+ },
427
+ {
428
+ "dataPath": "params_shard_14.bin",
429
+ "format": "raw-shard",
430
+ "nbytes": 45088768,
431
+ "records": [
432
+ {
433
+ "name": "model.layers.12.mlp.gate_up_proj.weight",
434
+ "shape": [
435
+ 11008,
436
+ 2048
437
+ ],
438
+ "dtype": "float16",
439
+ "format": "f32-to-bf16",
440
+ "nbytes": 45088768,
441
+ "byteOffset": 0
442
+ }
443
+ ],
444
+ "md5sum": "750ea469ec1790dd12fc5df63245f36c"
445
+ },
446
+ {
447
+ "dataPath": "params_shard_15.bin",
448
+ "format": "raw-shard",
449
+ "nbytes": 25165824,
450
+ "records": [
451
+ {
452
+ "name": "model.layers.12.self_attn.c_attn.weight",
453
+ "shape": [
454
+ 6144,
455
+ 2048
456
+ ],
457
+ "dtype": "float16",
458
+ "format": "f32-to-bf16",
459
+ "nbytes": 25165824,
460
+ "byteOffset": 0
461
+ }
462
+ ],
463
+ "md5sum": "53d7deafe41c7cfd26cdde144f001b84"
464
+ },
465
+ {
466
+ "dataPath": "params_shard_16.bin",
467
+ "format": "raw-shard",
468
+ "nbytes": 30953472,
469
+ "records": [
470
+ {
471
+ "name": "model.layers.11.self_attn.o_proj.weight",
472
+ "shape": [
473
+ 2048,
474
+ 2048
475
+ ],
476
+ "dtype": "float16",
477
+ "format": "f32-to-bf16",
478
+ "nbytes": 8388608,
479
+ "byteOffset": 0
480
+ },
481
+ {
482
+ "name": "model.layers.12.input_layernorm.weight",
483
+ "shape": [
484
+ 2048
485
+ ],
486
+ "dtype": "float16",
487
+ "format": "f32-to-bf16",
488
+ "nbytes": 4096,
489
+ "byteOffset": 8388608
490
+ },
491
+ {
492
+ "name": "model.layers.12.mlp.down_proj.weight",
493
+ "shape": [
494
+ 2048,
495
+ 5504
496
+ ],
497
+ "dtype": "float16",
498
+ "format": "f32-to-bf16",
499
+ "nbytes": 22544384,
500
+ "byteOffset": 8392704
501
+ },
502
+ {
503
+ "name": "model.layers.12.post_attention_layernorm.weight",
504
+ "shape": [
505
+ 2048
506
+ ],
507
+ "dtype": "float16",
508
+ "format": "f32-to-bf16",
509
+ "nbytes": 4096,
510
+ "byteOffset": 30937088
511
+ },
512
+ {
513
+ "name": "model.layers.12.self_attn.c_attn.bias",
514
+ "shape": [
515
+ 6144
516
+ ],
517
+ "dtype": "float16",
518
+ "format": "f32-to-bf16",
519
+ "nbytes": 12288,
520
+ "byteOffset": 30941184
521
+ }
522
+ ],
523
+ "md5sum": "d5ab1c2914b7b5fb9463ae06c17ad62f"
524
+ },
525
+ {
526
+ "dataPath": "params_shard_17.bin",
527
+ "format": "raw-shard",
528
+ "nbytes": 45088768,
529
+ "records": [
530
+ {
531
+ "name": "model.layers.13.mlp.gate_up_proj.weight",
532
+ "shape": [
533
+ 11008,
534
+ 2048
535
+ ],
536
+ "dtype": "float16",
537
+ "format": "f32-to-bf16",
538
+ "nbytes": 45088768,
539
+ "byteOffset": 0
540
+ }
541
+ ],
542
+ "md5sum": "c818bbd34597e1adc930a21bc1281901"
543
+ },
544
+ {
545
+ "dataPath": "params_shard_18.bin",
546
+ "format": "raw-shard",
547
+ "nbytes": 25165824,
548
+ "records": [
549
+ {
550
+ "name": "model.layers.13.self_attn.c_attn.weight",
551
+ "shape": [
552
+ 6144,
553
+ 2048
554
+ ],
555
+ "dtype": "float16",
556
+ "format": "f32-to-bf16",
557
+ "nbytes": 25165824,
558
+ "byteOffset": 0
559
+ }
560
+ ],
561
+ "md5sum": "8cb5f8dc0b1df94cdc4758972b8bcb79"
562
+ },
563
+ {
564
+ "dataPath": "params_shard_19.bin",
565
+ "format": "raw-shard",
566
+ "nbytes": 30953472,
567
+ "records": [
568
+ {
569
+ "name": "model.layers.12.self_attn.o_proj.weight",
570
+ "shape": [
571
+ 2048,
572
+ 2048
573
+ ],
574
+ "dtype": "float16",
575
+ "format": "f32-to-bf16",
576
+ "nbytes": 8388608,
577
+ "byteOffset": 0
578
+ },
579
+ {
580
+ "name": "model.layers.13.input_layernorm.weight",
581
+ "shape": [
582
+ 2048
583
+ ],
584
+ "dtype": "float16",
585
+ "format": "f32-to-bf16",
586
+ "nbytes": 4096,
587
+ "byteOffset": 8388608
588
+ },
589
+ {
590
+ "name": "model.layers.13.mlp.down_proj.weight",
591
+ "shape": [
592
+ 2048,
593
+ 5504
594
+ ],
595
+ "dtype": "float16",
596
+ "format": "f32-to-bf16",
597
+ "nbytes": 22544384,
598
+ "byteOffset": 8392704
599
+ },
600
+ {
601
+ "name": "model.layers.13.post_attention_layernorm.weight",
602
+ "shape": [
603
+ 2048
604
+ ],
605
+ "dtype": "float16",
606
+ "format": "f32-to-bf16",
607
+ "nbytes": 4096,
608
+ "byteOffset": 30937088
609
+ },
610
+ {
611
+ "name": "model.layers.13.self_attn.c_attn.bias",
612
+ "shape": [
613
+ 6144
614
+ ],
615
+ "dtype": "float16",
616
+ "format": "f32-to-bf16",
617
+ "nbytes": 12288,
618
+ "byteOffset": 30941184
619
+ }
620
+ ],
621
+ "md5sum": "81ea46aba7c4040e488eb1f53eb85395"
622
+ },
623
+ {
624
+ "dataPath": "params_shard_20.bin",
625
+ "format": "raw-shard",
626
+ "nbytes": 45088768,
627
+ "records": [
628
+ {
629
+ "name": "model.layers.14.mlp.gate_up_proj.weight",
630
+ "shape": [
631
+ 11008,
632
+ 2048
633
+ ],
634
+ "dtype": "float16",
635
+ "format": "f32-to-bf16",
636
+ "nbytes": 45088768,
637
+ "byteOffset": 0
638
+ }
639
+ ],
640
+ "md5sum": "c3fe540355374d554e380b67b9130e6d"
641
+ },
642
+ {
643
+ "dataPath": "params_shard_21.bin",
644
+ "format": "raw-shard",
645
+ "nbytes": 25165824,
646
+ "records": [
647
+ {
648
+ "name": "model.layers.14.self_attn.c_attn.weight",
649
+ "shape": [
650
+ 6144,
651
+ 2048
652
+ ],
653
+ "dtype": "float16",
654
+ "format": "f32-to-bf16",
655
+ "nbytes": 25165824,
656
+ "byteOffset": 0
657
+ }
658
+ ],
659
+ "md5sum": "6acccdb7b8091022a954b0629df7a8d7"
660
+ },
661
+ {
662
+ "dataPath": "params_shard_22.bin",
663
+ "format": "raw-shard",
664
+ "nbytes": 30953472,
665
+ "records": [
666
+ {
667
+ "name": "model.layers.13.self_attn.o_proj.weight",
668
+ "shape": [
669
+ 2048,
670
+ 2048
671
+ ],
672
+ "dtype": "float16",
673
+ "format": "f32-to-bf16",
674
+ "nbytes": 8388608,
675
+ "byteOffset": 0
676
+ },
677
+ {
678
+ "name": "model.layers.14.input_layernorm.weight",
679
+ "shape": [
680
+ 2048
681
+ ],
682
+ "dtype": "float16",
683
+ "format": "f32-to-bf16",
684
+ "nbytes": 4096,
685
+ "byteOffset": 8388608
686
+ },
687
+ {
688
+ "name": "model.layers.14.mlp.down_proj.weight",
689
+ "shape": [
690
+ 2048,
691
+ 5504
692
+ ],
693
+ "dtype": "float16",
694
+ "format": "f32-to-bf16",
695
+ "nbytes": 22544384,
696
+ "byteOffset": 8392704
697
+ },
698
+ {
699
+ "name": "model.layers.14.post_attention_layernorm.weight",
700
+ "shape": [
701
+ 2048
702
+ ],
703
+ "dtype": "float16",
704
+ "format": "f32-to-bf16",
705
+ "nbytes": 4096,
706
+ "byteOffset": 30937088
707
+ },
708
+ {
709
+ "name": "model.layers.14.self_attn.c_attn.bias",
710
+ "shape": [
711
+ 6144
712
+ ],
713
+ "dtype": "float16",
714
+ "format": "f32-to-bf16",
715
+ "nbytes": 12288,
716
+ "byteOffset": 30941184
717
+ }
718
+ ],
719
+ "md5sum": "1c4e8ee8dfc04f527e3b04441cee0621"
720
+ },
721
+ {
722
+ "dataPath": "params_shard_23.bin",
723
+ "format": "raw-shard",
724
+ "nbytes": 45088768,
725
+ "records": [
726
+ {
727
+ "name": "model.layers.15.mlp.gate_up_proj.weight",
728
+ "shape": [
729
+ 11008,
730
+ 2048
731
+ ],
732
+ "dtype": "float16",
733
+ "format": "f32-to-bf16",
734
+ "nbytes": 45088768,
735
+ "byteOffset": 0
736
+ }
737
+ ],
738
+ "md5sum": "445a73f350f95f8c6aaf470a0648aa41"
739
+ },
740
+ {
741
+ "dataPath": "params_shard_24.bin",
742
+ "format": "raw-shard",
743
+ "nbytes": 25165824,
744
+ "records": [
745
+ {
746
+ "name": "model.layers.15.self_attn.c_attn.weight",
747
+ "shape": [
748
+ 6144,
749
+ 2048
750
+ ],
751
+ "dtype": "float16",
752
+ "format": "f32-to-bf16",
753
+ "nbytes": 25165824,
754
+ "byteOffset": 0
755
+ }
756
+ ],
757
+ "md5sum": "b5f6ab967a5a9748e5b9ae7a939352bd"
758
+ },
759
+ {
760
+ "dataPath": "params_shard_25.bin",
761
+ "format": "raw-shard",
762
+ "nbytes": 30953472,
763
+ "records": [
764
+ {
765
+ "name": "model.layers.14.self_attn.o_proj.weight",
766
+ "shape": [
767
+ 2048,
768
+ 2048
769
+ ],
770
+ "dtype": "float16",
771
+ "format": "f32-to-bf16",
772
+ "nbytes": 8388608,
773
+ "byteOffset": 0
774
+ },
775
+ {
776
+ "name": "model.layers.15.input_layernorm.weight",
777
+ "shape": [
778
+ 2048
779
+ ],
780
+ "dtype": "float16",
781
+ "format": "f32-to-bf16",
782
+ "nbytes": 4096,
783
+ "byteOffset": 8388608
784
+ },
785
+ {
786
+ "name": "model.layers.15.mlp.down_proj.weight",
787
+ "shape": [
788
+ 2048,
789
+ 5504
790
+ ],
791
+ "dtype": "float16",
792
+ "format": "f32-to-bf16",
793
+ "nbytes": 22544384,
794
+ "byteOffset": 8392704
795
+ },
796
+ {
797
+ "name": "model.layers.15.post_attention_layernorm.weight",
798
+ "shape": [
799
+ 2048
800
+ ],
801
+ "dtype": "float16",
802
+ "format": "f32-to-bf16",
803
+ "nbytes": 4096,
804
+ "byteOffset": 30937088
805
+ },
806
+ {
807
+ "name": "model.layers.15.self_attn.c_attn.bias",
808
+ "shape": [
809
+ 6144
810
+ ],
811
+ "dtype": "float16",
812
+ "format": "f32-to-bf16",
813
+ "nbytes": 12288,
814
+ "byteOffset": 30941184
815
+ }
816
+ ],
817
+ "md5sum": "d7692d14fdec212853e321f5849025fd"
818
+ },
819
+ {
820
+ "dataPath": "params_shard_26.bin",
821
+ "format": "raw-shard",
822
+ "nbytes": 45088768,
823
+ "records": [
824
+ {
825
+ "name": "model.layers.16.mlp.gate_up_proj.weight",
826
+ "shape": [
827
+ 11008,
828
+ 2048
829
+ ],
830
+ "dtype": "float16",
831
+ "format": "f32-to-bf16",
832
+ "nbytes": 45088768,
833
+ "byteOffset": 0
834
+ }
835
+ ],
836
+ "md5sum": "c9d31c64260450a8c520a43ddba6ccda"
837
+ },
838
+ {
839
+ "dataPath": "params_shard_27.bin",
840
+ "format": "raw-shard",
841
+ "nbytes": 25165824,
842
+ "records": [
843
+ {
844
+ "name": "model.layers.16.self_attn.c_attn.weight",
845
+ "shape": [
846
+ 6144,
847
+ 2048
848
+ ],
849
+ "dtype": "float16",
850
+ "format": "f32-to-bf16",
851
+ "nbytes": 25165824,
852
+ "byteOffset": 0
853
+ }
854
+ ],
855
+ "md5sum": "c1404d446281f41b4ab60e586ebe42af"
856
+ },
857
+ {
858
+ "dataPath": "params_shard_28.bin",
859
+ "format": "raw-shard",
860
+ "nbytes": 30953472,
861
+ "records": [
862
+ {
863
+ "name": "model.layers.15.self_attn.o_proj.weight",
864
+ "shape": [
865
+ 2048,
866
+ 2048
867
+ ],
868
+ "dtype": "float16",
869
+ "format": "f32-to-bf16",
870
+ "nbytes": 8388608,
871
+ "byteOffset": 0
872
+ },
873
+ {
874
+ "name": "model.layers.16.input_layernorm.weight",
875
+ "shape": [
876
+ 2048
877
+ ],
878
+ "dtype": "float16",
879
+ "format": "f32-to-bf16",
880
+ "nbytes": 4096,
881
+ "byteOffset": 8388608
882
+ },
883
+ {
884
+ "name": "model.layers.16.mlp.down_proj.weight",
885
+ "shape": [
886
+ 2048,
887
+ 5504
888
+ ],
889
+ "dtype": "float16",
890
+ "format": "f32-to-bf16",
891
+ "nbytes": 22544384,
892
+ "byteOffset": 8392704
893
+ },
894
+ {
895
+ "name": "model.layers.16.post_attention_layernorm.weight",
896
+ "shape": [
897
+ 2048
898
+ ],
899
+ "dtype": "float16",
900
+ "format": "f32-to-bf16",
901
+ "nbytes": 4096,
902
+ "byteOffset": 30937088
903
+ },
904
+ {
905
+ "name": "model.layers.16.self_attn.c_attn.bias",
906
+ "shape": [
907
+ 6144
908
+ ],
909
+ "dtype": "float16",
910
+ "format": "f32-to-bf16",
911
+ "nbytes": 12288,
912
+ "byteOffset": 30941184
913
+ }
914
+ ],
915
+ "md5sum": "33d7615d5499f44e57ec03c95ab2b5d8"
916
+ },
917
+ {
918
+ "dataPath": "params_shard_29.bin",
919
+ "format": "raw-shard",
920
+ "nbytes": 45088768,
921
+ "records": [
922
+ {
923
+ "name": "model.layers.17.mlp.gate_up_proj.weight",
924
+ "shape": [
925
+ 11008,
926
+ 2048
927
+ ],
928
+ "dtype": "float16",
929
+ "format": "f32-to-bf16",
930
+ "nbytes": 45088768,
931
+ "byteOffset": 0
932
+ }
933
+ ],
934
+ "md5sum": "b000699d16122be07193b657fce01e3b"
935
+ },
936
+ {
937
+ "dataPath": "params_shard_30.bin",
938
+ "format": "raw-shard",
939
+ "nbytes": 25165824,
940
+ "records": [
941
+ {
942
+ "name": "model.layers.17.self_attn.c_attn.weight",
943
+ "shape": [
944
+ 6144,
945
+ 2048
946
+ ],
947
+ "dtype": "float16",
948
+ "format": "f32-to-bf16",
949
+ "nbytes": 25165824,
950
+ "byteOffset": 0
951
+ }
952
+ ],
953
+ "md5sum": "7edca338357f18752de1c9a894e7653f"
954
+ },
955
+ {
956
+ "dataPath": "params_shard_31.bin",
957
+ "format": "raw-shard",
958
+ "nbytes": 30953472,
959
+ "records": [
960
+ {
961
+ "name": "model.layers.16.self_attn.o_proj.weight",
962
+ "shape": [
963
+ 2048,
964
+ 2048
965
+ ],
966
+ "dtype": "float16",
967
+ "format": "f32-to-bf16",
968
+ "nbytes": 8388608,
969
+ "byteOffset": 0
970
+ },
971
+ {
972
+ "name": "model.layers.17.input_layernorm.weight",
973
+ "shape": [
974
+ 2048
975
+ ],
976
+ "dtype": "float16",
977
+ "format": "f32-to-bf16",
978
+ "nbytes": 4096,
979
+ "byteOffset": 8388608
980
+ },
981
+ {
982
+ "name": "model.layers.17.mlp.down_proj.weight",
983
+ "shape": [
984
+ 2048,
985
+ 5504
986
+ ],
987
+ "dtype": "float16",
988
+ "format": "f32-to-bf16",
989
+ "nbytes": 22544384,
990
+ "byteOffset": 8392704
991
+ },
992
+ {
993
+ "name": "model.layers.17.post_attention_layernorm.weight",
994
+ "shape": [
995
+ 2048
996
+ ],
997
+ "dtype": "float16",
998
+ "format": "f32-to-bf16",
999
+ "nbytes": 4096,
1000
+ "byteOffset": 30937088
1001
+ },
1002
+ {
1003
+ "name": "model.layers.17.self_attn.c_attn.bias",
1004
+ "shape": [
1005
+ 6144
1006
+ ],
1007
+ "dtype": "float16",
1008
+ "format": "f32-to-bf16",
1009
+ "nbytes": 12288,
1010
+ "byteOffset": 30941184
1011
+ }
1012
+ ],
1013
+ "md5sum": "9854d2759e328a0230b7d8f7623dc54a"
1014
+ },
1015
+ {
1016
+ "dataPath": "params_shard_32.bin",
1017
+ "format": "raw-shard",
1018
+ "nbytes": 45088768,
1019
+ "records": [
1020
+ {
1021
+ "name": "model.layers.18.mlp.gate_up_proj.weight",
1022
+ "shape": [
1023
+ 11008,
1024
+ 2048
1025
+ ],
1026
+ "dtype": "float16",
1027
+ "format": "f32-to-bf16",
1028
+ "nbytes": 45088768,
1029
+ "byteOffset": 0
1030
+ }
1031
+ ],
1032
+ "md5sum": "9c8b9e17393d828088467705cb6f6825"
1033
+ },
1034
+ {
1035
+ "dataPath": "params_shard_33.bin",
1036
+ "format": "raw-shard",
1037
+ "nbytes": 25165824,
1038
+ "records": [
1039
+ {
1040
+ "name": "model.layers.18.self_attn.c_attn.weight",
1041
+ "shape": [
1042
+ 6144,
1043
+ 2048
1044
+ ],
1045
+ "dtype": "float16",
1046
+ "format": "f32-to-bf16",
1047
+ "nbytes": 25165824,
1048
+ "byteOffset": 0
1049
+ }
1050
+ ],
1051
+ "md5sum": "f37dd3700560972a3e624e0ee4beb0f8"
1052
+ },
1053
+ {
1054
+ "dataPath": "params_shard_34.bin",
1055
+ "format": "raw-shard",
1056
+ "nbytes": 30953472,
1057
+ "records": [
1058
+ {
1059
+ "name": "model.layers.17.self_attn.o_proj.weight",
1060
+ "shape": [
1061
+ 2048,
1062
+ 2048
1063
+ ],
1064
+ "dtype": "float16",
1065
+ "format": "f32-to-bf16",
1066
+ "nbytes": 8388608,
1067
+ "byteOffset": 0
1068
+ },
1069
+ {
1070
+ "name": "model.layers.18.input_layernorm.weight",
1071
+ "shape": [
1072
+ 2048
1073
+ ],
1074
+ "dtype": "float16",
1075
+ "format": "f32-to-bf16",
1076
+ "nbytes": 4096,
1077
+ "byteOffset": 8388608
1078
+ },
1079
+ {
1080
+ "name": "model.layers.18.mlp.down_proj.weight",
1081
+ "shape": [
1082
+ 2048,
1083
+ 5504
1084
+ ],
1085
+ "dtype": "float16",
1086
+ "format": "f32-to-bf16",
1087
+ "nbytes": 22544384,
1088
+ "byteOffset": 8392704
1089
+ },
1090
+ {
1091
+ "name": "model.layers.18.post_attention_layernorm.weight",
1092
+ "shape": [
1093
+ 2048
1094
+ ],
1095
+ "dtype": "float16",
1096
+ "format": "f32-to-bf16",
1097
+ "nbytes": 4096,
1098
+ "byteOffset": 30937088
1099
+ },
1100
+ {
1101
+ "name": "model.layers.18.self_attn.c_attn.bias",
1102
+ "shape": [
1103
+ 6144
1104
+ ],
1105
+ "dtype": "float16",
1106
+ "format": "f32-to-bf16",
1107
+ "nbytes": 12288,
1108
+ "byteOffset": 30941184
1109
+ }
1110
+ ],
1111
+ "md5sum": "7a8438b7fdb6b3fee35d8372f1f9df80"
1112
+ },
1113
+ {
1114
+ "dataPath": "params_shard_35.bin",
1115
+ "format": "raw-shard",
1116
+ "nbytes": 45088768,
1117
+ "records": [
1118
+ {
1119
+ "name": "model.layers.19.mlp.gate_up_proj.weight",
1120
+ "shape": [
1121
+ 11008,
1122
+ 2048
1123
+ ],
1124
+ "dtype": "float16",
1125
+ "format": "f32-to-bf16",
1126
+ "nbytes": 45088768,
1127
+ "byteOffset": 0
1128
+ }
1129
+ ],
1130
+ "md5sum": "c376d9da8152e4b47d955418afcfa6f5"
1131
+ },
1132
+ {
1133
+ "dataPath": "params_shard_36.bin",
1134
+ "format": "raw-shard",
1135
+ "nbytes": 25165824,
1136
+ "records": [
1137
+ {
1138
+ "name": "model.layers.19.self_attn.c_attn.weight",
1139
+ "shape": [
1140
+ 6144,
1141
+ 2048
1142
+ ],
1143
+ "dtype": "float16",
1144
+ "format": "f32-to-bf16",
1145
+ "nbytes": 25165824,
1146
+ "byteOffset": 0
1147
+ }
1148
+ ],
1149
+ "md5sum": "bdc65263820cdccb8af74c212d500cf4"
1150
+ },
1151
+ {
1152
+ "dataPath": "params_shard_37.bin",
1153
+ "format": "raw-shard",
1154
+ "nbytes": 30953472,
1155
+ "records": [
1156
+ {
1157
+ "name": "model.layers.18.self_attn.o_proj.weight",
1158
+ "shape": [
1159
+ 2048,
1160
+ 2048
1161
+ ],
1162
+ "dtype": "float16",
1163
+ "format": "f32-to-bf16",
1164
+ "nbytes": 8388608,
1165
+ "byteOffset": 0
1166
+ },
1167
+ {
1168
+ "name": "model.layers.19.input_layernorm.weight",
1169
+ "shape": [
1170
+ 2048
1171
+ ],
1172
+ "dtype": "float16",
1173
+ "format": "f32-to-bf16",
1174
+ "nbytes": 4096,
1175
+ "byteOffset": 8388608
1176
+ },
1177
+ {
1178
+ "name": "model.layers.19.mlp.down_proj.weight",
1179
+ "shape": [
1180
+ 2048,
1181
+ 5504
1182
+ ],
1183
+ "dtype": "float16",
1184
+ "format": "f32-to-bf16",
1185
+ "nbytes": 22544384,
1186
+ "byteOffset": 8392704
1187
+ },
1188
+ {
1189
+ "name": "model.layers.19.post_attention_layernorm.weight",
1190
+ "shape": [
1191
+ 2048
1192
+ ],
1193
+ "dtype": "float16",
1194
+ "format": "f32-to-bf16",
1195
+ "nbytes": 4096,
1196
+ "byteOffset": 30937088
1197
+ },
1198
+ {
1199
+ "name": "model.layers.19.self_attn.c_attn.bias",
1200
+ "shape": [
1201
+ 6144
1202
+ ],
1203
+ "dtype": "float16",
1204
+ "format": "f32-to-bf16",
1205
+ "nbytes": 12288,
1206
+ "byteOffset": 30941184
1207
+ }
1208
+ ],
1209
+ "md5sum": "17316b02f50d215eee2c183605a28455"
1210
+ },
1211
+ {
1212
+ "dataPath": "params_shard_38.bin",
1213
+ "format": "raw-shard",
1214
+ "nbytes": 45088768,
1215
+ "records": [
1216
+ {
1217
+ "name": "model.layers.2.mlp.gate_up_proj.weight",
1218
+ "shape": [
1219
+ 11008,
1220
+ 2048
1221
+ ],
1222
+ "dtype": "float16",
1223
+ "format": "f32-to-bf16",
1224
+ "nbytes": 45088768,
1225
+ "byteOffset": 0
1226
+ }
1227
+ ],
1228
+ "md5sum": "a6739cd336bcfca9fcf694f4c859e785"
1229
+ },
1230
+ {
1231
+ "dataPath": "params_shard_39.bin",
1232
+ "format": "raw-shard",
1233
+ "nbytes": 25165824,
1234
+ "records": [
1235
+ {
1236
+ "name": "model.layers.2.self_attn.c_attn.weight",
1237
+ "shape": [
1238
+ 6144,
1239
+ 2048
1240
+ ],
1241
+ "dtype": "float16",
1242
+ "format": "f32-to-bf16",
1243
+ "nbytes": 25165824,
1244
+ "byteOffset": 0
1245
+ }
1246
+ ],
1247
+ "md5sum": "52be86752d8b85e4eab1baf5ff2dc06a"
1248
+ },
1249
+ {
1250
+ "dataPath": "params_shard_40.bin",
1251
+ "format": "raw-shard",
1252
+ "nbytes": 30953472,
1253
+ "records": [
1254
+ {
1255
+ "name": "model.layers.19.self_attn.o_proj.weight",
1256
+ "shape": [
1257
+ 2048,
1258
+ 2048
1259
+ ],
1260
+ "dtype": "float16",
1261
+ "format": "f32-to-bf16",
1262
+ "nbytes": 8388608,
1263
+ "byteOffset": 0
1264
+ },
1265
+ {
1266
+ "name": "model.layers.2.input_layernorm.weight",
1267
+ "shape": [
1268
+ 2048
1269
+ ],
1270
+ "dtype": "float16",
1271
+ "format": "f32-to-bf16",
1272
+ "nbytes": 4096,
1273
+ "byteOffset": 8388608
1274
+ },
1275
+ {
1276
+ "name": "model.layers.2.mlp.down_proj.weight",
1277
+ "shape": [
1278
+ 2048,
1279
+ 5504
1280
+ ],
1281
+ "dtype": "float16",
1282
+ "format": "f32-to-bf16",
1283
+ "nbytes": 22544384,
1284
+ "byteOffset": 8392704
1285
+ },
1286
+ {
1287
+ "name": "model.layers.2.post_attention_layernorm.weight",
1288
+ "shape": [
1289
+ 2048
1290
+ ],
1291
+ "dtype": "float16",
1292
+ "format": "f32-to-bf16",
1293
+ "nbytes": 4096,
1294
+ "byteOffset": 30937088
1295
+ },
1296
+ {
1297
+ "name": "model.layers.2.self_attn.c_attn.bias",
1298
+ "shape": [
1299
+ 6144
1300
+ ],
1301
+ "dtype": "float16",
1302
+ "format": "f32-to-bf16",
1303
+ "nbytes": 12288,
1304
+ "byteOffset": 30941184
1305
+ }
1306
+ ],
1307
+ "md5sum": "45412cba0d7d92875a4b66dceaaee000"
1308
+ },
1309
+ {
1310
+ "dataPath": "params_shard_41.bin",
1311
+ "format": "raw-shard",
1312
+ "nbytes": 45088768,
1313
+ "records": [
1314
+ {
1315
+ "name": "model.layers.20.mlp.gate_up_proj.weight",
1316
+ "shape": [
1317
+ 11008,
1318
+ 2048
1319
+ ],
1320
+ "dtype": "float16",
1321
+ "format": "f32-to-bf16",
1322
+ "nbytes": 45088768,
1323
+ "byteOffset": 0
1324
+ }
1325
+ ],
1326
+ "md5sum": "f376a2446673e2884216fb480defe9a4"
1327
+ },
1328
+ {
1329
+ "dataPath": "params_shard_42.bin",
1330
+ "format": "raw-shard",
1331
+ "nbytes": 25165824,
1332
+ "records": [
1333
+ {
1334
+ "name": "model.layers.20.self_attn.c_attn.weight",
1335
+ "shape": [
1336
+ 6144,
1337
+ 2048
1338
+ ],
1339
+ "dtype": "float16",
1340
+ "format": "f32-to-bf16",
1341
+ "nbytes": 25165824,
1342
+ "byteOffset": 0
1343
+ }
1344
+ ],
1345
+ "md5sum": "58e5c1da262bdbc658646cdb25586e50"
1346
+ },
1347
+ {
1348
+ "dataPath": "params_shard_43.bin",
1349
+ "format": "raw-shard",
1350
+ "nbytes": 30953472,
1351
+ "records": [
1352
+ {
1353
+ "name": "model.layers.2.self_attn.o_proj.weight",
1354
+ "shape": [
1355
+ 2048,
1356
+ 2048
1357
+ ],
1358
+ "dtype": "float16",
1359
+ "format": "f32-to-bf16",
1360
+ "nbytes": 8388608,
1361
+ "byteOffset": 0
1362
+ },
1363
+ {
1364
+ "name": "model.layers.20.input_layernorm.weight",
1365
+ "shape": [
1366
+ 2048
1367
+ ],
1368
+ "dtype": "float16",
1369
+ "format": "f32-to-bf16",
1370
+ "nbytes": 4096,
1371
+ "byteOffset": 8388608
1372
+ },
1373
+ {
1374
+ "name": "model.layers.20.mlp.down_proj.weight",
1375
+ "shape": [
1376
+ 2048,
1377
+ 5504
1378
+ ],
1379
+ "dtype": "float16",
1380
+ "format": "f32-to-bf16",
1381
+ "nbytes": 22544384,
1382
+ "byteOffset": 8392704
1383
+ },
1384
+ {
1385
+ "name": "model.layers.20.post_attention_layernorm.weight",
1386
+ "shape": [
1387
+ 2048
1388
+ ],
1389
+ "dtype": "float16",
1390
+ "format": "f32-to-bf16",
1391
+ "nbytes": 4096,
1392
+ "byteOffset": 30937088
1393
+ },
1394
+ {
1395
+ "name": "model.layers.20.self_attn.c_attn.bias",
1396
+ "shape": [
1397
+ 6144
1398
+ ],
1399
+ "dtype": "float16",
1400
+ "format": "f32-to-bf16",
1401
+ "nbytes": 12288,
1402
+ "byteOffset": 30941184
1403
+ }
1404
+ ],
1405
+ "md5sum": "9cb5fb5ca40f3c44b0892935edb590c8"
1406
+ },
1407
+ {
1408
+ "dataPath": "params_shard_44.bin",
1409
+ "format": "raw-shard",
1410
+ "nbytes": 45088768,
1411
+ "records": [
1412
+ {
1413
+ "name": "model.layers.21.mlp.gate_up_proj.weight",
1414
+ "shape": [
1415
+ 11008,
1416
+ 2048
1417
+ ],
1418
+ "dtype": "float16",
1419
+ "format": "f32-to-bf16",
1420
+ "nbytes": 45088768,
1421
+ "byteOffset": 0
1422
+ }
1423
+ ],
1424
+ "md5sum": "e8dc7a080286b770f450c7b8b9233d31"
1425
+ },
1426
+ {
1427
+ "dataPath": "params_shard_45.bin",
1428
+ "format": "raw-shard",
1429
+ "nbytes": 25165824,
1430
+ "records": [
1431
+ {
1432
+ "name": "model.layers.21.self_attn.c_attn.weight",
1433
+ "shape": [
1434
+ 6144,
1435
+ 2048
1436
+ ],
1437
+ "dtype": "float16",
1438
+ "format": "f32-to-bf16",
1439
+ "nbytes": 25165824,
1440
+ "byteOffset": 0
1441
+ }
1442
+ ],
1443
+ "md5sum": "c3a070435a1738702f4fa97639b1295f"
1444
+ },
1445
+ {
1446
+ "dataPath": "params_shard_46.bin",
1447
+ "format": "raw-shard",
1448
+ "nbytes": 30953472,
1449
+ "records": [
1450
+ {
1451
+ "name": "model.layers.20.self_attn.o_proj.weight",
1452
+ "shape": [
1453
+ 2048,
1454
+ 2048
1455
+ ],
1456
+ "dtype": "float16",
1457
+ "format": "f32-to-bf16",
1458
+ "nbytes": 8388608,
1459
+ "byteOffset": 0
1460
+ },
1461
+ {
1462
+ "name": "model.layers.21.input_layernorm.weight",
1463
+ "shape": [
1464
+ 2048
1465
+ ],
1466
+ "dtype": "float16",
1467
+ "format": "f32-to-bf16",
1468
+ "nbytes": 4096,
1469
+ "byteOffset": 8388608
1470
+ },
1471
+ {
1472
+ "name": "model.layers.21.mlp.down_proj.weight",
1473
+ "shape": [
1474
+ 2048,
1475
+ 5504
1476
+ ],
1477
+ "dtype": "float16",
1478
+ "format": "f32-to-bf16",
1479
+ "nbytes": 22544384,
1480
+ "byteOffset": 8392704
1481
+ },
1482
+ {
1483
+ "name": "model.layers.21.post_attention_layernorm.weight",
1484
+ "shape": [
1485
+ 2048
1486
+ ],
1487
+ "dtype": "float16",
1488
+ "format": "f32-to-bf16",
1489
+ "nbytes": 4096,
1490
+ "byteOffset": 30937088
1491
+ },
1492
+ {
1493
+ "name": "model.layers.21.self_attn.c_attn.bias",
1494
+ "shape": [
1495
+ 6144
1496
+ ],
1497
+ "dtype": "float16",
1498
+ "format": "f32-to-bf16",
1499
+ "nbytes": 12288,
1500
+ "byteOffset": 30941184
1501
+ }
1502
+ ],
1503
+ "md5sum": "dc5afcb289c1193402fc5a0d3ebe3b1d"
1504
+ },
1505
+ {
1506
+ "dataPath": "params_shard_47.bin",
1507
+ "format": "raw-shard",
1508
+ "nbytes": 45088768,
1509
+ "records": [
1510
+ {
1511
+ "name": "model.layers.22.mlp.gate_up_proj.weight",
1512
+ "shape": [
1513
+ 11008,
1514
+ 2048
1515
+ ],
1516
+ "dtype": "float16",
1517
+ "format": "f32-to-bf16",
1518
+ "nbytes": 45088768,
1519
+ "byteOffset": 0
1520
+ }
1521
+ ],
1522
+ "md5sum": "1e6b00aec599d853df3baa265d4a1690"
1523
+ },
1524
+ {
1525
+ "dataPath": "params_shard_48.bin",
1526
+ "format": "raw-shard",
1527
+ "nbytes": 25165824,
1528
+ "records": [
1529
+ {
1530
+ "name": "model.layers.22.self_attn.c_attn.weight",
1531
+ "shape": [
1532
+ 6144,
1533
+ 2048
1534
+ ],
1535
+ "dtype": "float16",
1536
+ "format": "f32-to-bf16",
1537
+ "nbytes": 25165824,
1538
+ "byteOffset": 0
1539
+ }
1540
+ ],
1541
+ "md5sum": "bdb912d45a0626cb91a0da0abbfb84a8"
1542
+ },
1543
+ {
1544
+ "dataPath": "params_shard_49.bin",
1545
+ "format": "raw-shard",
1546
+ "nbytes": 30953472,
1547
+ "records": [
1548
+ {
1549
+ "name": "model.layers.21.self_attn.o_proj.weight",
1550
+ "shape": [
1551
+ 2048,
1552
+ 2048
1553
+ ],
1554
+ "dtype": "float16",
1555
+ "format": "f32-to-bf16",
1556
+ "nbytes": 8388608,
1557
+ "byteOffset": 0
1558
+ },
1559
+ {
1560
+ "name": "model.layers.22.input_layernorm.weight",
1561
+ "shape": [
1562
+ 2048
1563
+ ],
1564
+ "dtype": "float16",
1565
+ "format": "f32-to-bf16",
1566
+ "nbytes": 4096,
1567
+ "byteOffset": 8388608
1568
+ },
1569
+ {
1570
+ "name": "model.layers.22.mlp.down_proj.weight",
1571
+ "shape": [
1572
+ 2048,
1573
+ 5504
1574
+ ],
1575
+ "dtype": "float16",
1576
+ "format": "f32-to-bf16",
1577
+ "nbytes": 22544384,
1578
+ "byteOffset": 8392704
1579
+ },
1580
+ {
1581
+ "name": "model.layers.22.post_attention_layernorm.weight",
1582
+ "shape": [
1583
+ 2048
1584
+ ],
1585
+ "dtype": "float16",
1586
+ "format": "f32-to-bf16",
1587
+ "nbytes": 4096,
1588
+ "byteOffset": 30937088
1589
+ },
1590
+ {
1591
+ "name": "model.layers.22.self_attn.c_attn.bias",
1592
+ "shape": [
1593
+ 6144
1594
+ ],
1595
+ "dtype": "float16",
1596
+ "format": "f32-to-bf16",
1597
+ "nbytes": 12288,
1598
+ "byteOffset": 30941184
1599
+ }
1600
+ ],
1601
+ "md5sum": "de46fd346a15c41b42e67b3d0ae673c9"
1602
+ },
1603
+ {
1604
+ "dataPath": "params_shard_50.bin",
1605
+ "format": "raw-shard",
1606
+ "nbytes": 45088768,
1607
+ "records": [
1608
+ {
1609
+ "name": "model.layers.23.mlp.gate_up_proj.weight",
1610
+ "shape": [
1611
+ 11008,
1612
+ 2048
1613
+ ],
1614
+ "dtype": "float16",
1615
+ "format": "f32-to-bf16",
1616
+ "nbytes": 45088768,
1617
+ "byteOffset": 0
1618
+ }
1619
+ ],
1620
+ "md5sum": "4cb92d3031b8a8d7e4944d6d6d26af3b"
1621
+ },
1622
+ {
1623
+ "dataPath": "params_shard_51.bin",
1624
+ "format": "raw-shard",
1625
+ "nbytes": 25165824,
1626
+ "records": [
1627
+ {
1628
+ "name": "model.layers.23.self_attn.c_attn.weight",
1629
+ "shape": [
1630
+ 6144,
1631
+ 2048
1632
+ ],
1633
+ "dtype": "float16",
1634
+ "format": "f32-to-bf16",
1635
+ "nbytes": 25165824,
1636
+ "byteOffset": 0
1637
+ }
1638
+ ],
1639
+ "md5sum": "8b7e38cd6f60865a1db8034d27eecaf0"
1640
+ },
1641
+ {
1642
+ "dataPath": "params_shard_52.bin",
1643
+ "format": "raw-shard",
1644
+ "nbytes": 30953472,
1645
+ "records": [
1646
+ {
1647
+ "name": "model.layers.22.self_attn.o_proj.weight",
1648
+ "shape": [
1649
+ 2048,
1650
+ 2048
1651
+ ],
1652
+ "dtype": "float16",
1653
+ "format": "f32-to-bf16",
1654
+ "nbytes": 8388608,
1655
+ "byteOffset": 0
1656
+ },
1657
+ {
1658
+ "name": "model.layers.23.input_layernorm.weight",
1659
+ "shape": [
1660
+ 2048
1661
+ ],
1662
+ "dtype": "float16",
1663
+ "format": "f32-to-bf16",
1664
+ "nbytes": 4096,
1665
+ "byteOffset": 8388608
1666
+ },
1667
+ {
1668
+ "name": "model.layers.23.mlp.down_proj.weight",
1669
+ "shape": [
1670
+ 2048,
1671
+ 5504
1672
+ ],
1673
+ "dtype": "float16",
1674
+ "format": "f32-to-bf16",
1675
+ "nbytes": 22544384,
1676
+ "byteOffset": 8392704
1677
+ },
1678
+ {
1679
+ "name": "model.layers.23.post_attention_layernorm.weight",
1680
+ "shape": [
1681
+ 2048
1682
+ ],
1683
+ "dtype": "float16",
1684
+ "format": "f32-to-bf16",
1685
+ "nbytes": 4096,
1686
+ "byteOffset": 30937088
1687
+ },
1688
+ {
1689
+ "name": "model.layers.23.self_attn.c_attn.bias",
1690
+ "shape": [
1691
+ 6144
1692
+ ],
1693
+ "dtype": "float16",
1694
+ "format": "f32-to-bf16",
1695
+ "nbytes": 12288,
1696
+ "byteOffset": 30941184
1697
+ }
1698
+ ],
1699
+ "md5sum": "8f04f524cd1d8946116a5751ebfbc85f"
1700
+ },
1701
+ {
1702
+ "dataPath": "params_shard_53.bin",
1703
+ "format": "raw-shard",
1704
+ "nbytes": 45088768,
1705
+ "records": [
1706
+ {
1707
+ "name": "model.layers.3.mlp.gate_up_proj.weight",
1708
+ "shape": [
1709
+ 11008,
1710
+ 2048
1711
+ ],
1712
+ "dtype": "float16",
1713
+ "format": "f32-to-bf16",
1714
+ "nbytes": 45088768,
1715
+ "byteOffset": 0
1716
+ }
1717
+ ],
1718
+ "md5sum": "401108ec17a0721237c7893d86405968"
1719
+ },
1720
+ {
1721
+ "dataPath": "params_shard_54.bin",
1722
+ "format": "raw-shard",
1723
+ "nbytes": 25165824,
1724
+ "records": [
1725
+ {
1726
+ "name": "model.layers.3.self_attn.c_attn.weight",
1727
+ "shape": [
1728
+ 6144,
1729
+ 2048
1730
+ ],
1731
+ "dtype": "float16",
1732
+ "format": "f32-to-bf16",
1733
+ "nbytes": 25165824,
1734
+ "byteOffset": 0
1735
+ }
1736
+ ],
1737
+ "md5sum": "131f98baea5d8cb84ede2ad0bdad81e4"
1738
+ },
1739
+ {
1740
+ "dataPath": "params_shard_55.bin",
1741
+ "format": "raw-shard",
1742
+ "nbytes": 30953472,
1743
+ "records": [
1744
+ {
1745
+ "name": "model.layers.23.self_attn.o_proj.weight",
1746
+ "shape": [
1747
+ 2048,
1748
+ 2048
1749
+ ],
1750
+ "dtype": "float16",
1751
+ "format": "f32-to-bf16",
1752
+ "nbytes": 8388608,
1753
+ "byteOffset": 0
1754
+ },
1755
+ {
1756
+ "name": "model.layers.3.input_layernorm.weight",
1757
+ "shape": [
1758
+ 2048
1759
+ ],
1760
+ "dtype": "float16",
1761
+ "format": "f32-to-bf16",
1762
+ "nbytes": 4096,
1763
+ "byteOffset": 8388608
1764
+ },
1765
+ {
1766
+ "name": "model.layers.3.mlp.down_proj.weight",
1767
+ "shape": [
1768
+ 2048,
1769
+ 5504
1770
+ ],
1771
+ "dtype": "float16",
1772
+ "format": "f32-to-bf16",
1773
+ "nbytes": 22544384,
1774
+ "byteOffset": 8392704
1775
+ },
1776
+ {
1777
+ "name": "model.layers.3.post_attention_layernorm.weight",
1778
+ "shape": [
1779
+ 2048
1780
+ ],
1781
+ "dtype": "float16",
1782
+ "format": "f32-to-bf16",
1783
+ "nbytes": 4096,
1784
+ "byteOffset": 30937088
1785
+ },
1786
+ {
1787
+ "name": "model.layers.3.self_attn.c_attn.bias",
1788
+ "shape": [
1789
+ 6144
1790
+ ],
1791
+ "dtype": "float16",
1792
+ "format": "f32-to-bf16",
1793
+ "nbytes": 12288,
1794
+ "byteOffset": 30941184
1795
+ }
1796
+ ],
1797
+ "md5sum": "8c25341395fac238ab0fe7eb58432000"
1798
+ },
1799
+ {
1800
+ "dataPath": "params_shard_56.bin",
1801
+ "format": "raw-shard",
1802
+ "nbytes": 45088768,
1803
+ "records": [
1804
+ {
1805
+ "name": "model.layers.4.mlp.gate_up_proj.weight",
1806
+ "shape": [
1807
+ 11008,
1808
+ 2048
1809
+ ],
1810
+ "dtype": "float16",
1811
+ "format": "f32-to-bf16",
1812
+ "nbytes": 45088768,
1813
+ "byteOffset": 0
1814
+ }
1815
+ ],
1816
+ "md5sum": "1b4069779763ce8e9ded3be2093aec82"
1817
+ },
1818
+ {
1819
+ "dataPath": "params_shard_57.bin",
1820
+ "format": "raw-shard",
1821
+ "nbytes": 25165824,
1822
+ "records": [
1823
+ {
1824
+ "name": "model.layers.4.self_attn.c_attn.weight",
1825
+ "shape": [
1826
+ 6144,
1827
+ 2048
1828
+ ],
1829
+ "dtype": "float16",
1830
+ "format": "f32-to-bf16",
1831
+ "nbytes": 25165824,
1832
+ "byteOffset": 0
1833
+ }
1834
+ ],
1835
+ "md5sum": "b7202732924be475d50445bd09148ebc"
1836
+ },
1837
+ {
1838
+ "dataPath": "params_shard_58.bin",
1839
+ "format": "raw-shard",
1840
+ "nbytes": 30953472,
1841
+ "records": [
1842
+ {
1843
+ "name": "model.layers.3.self_attn.o_proj.weight",
1844
+ "shape": [
1845
+ 2048,
1846
+ 2048
1847
+ ],
1848
+ "dtype": "float16",
1849
+ "format": "f32-to-bf16",
1850
+ "nbytes": 8388608,
1851
+ "byteOffset": 0
1852
+ },
1853
+ {
1854
+ "name": "model.layers.4.input_layernorm.weight",
1855
+ "shape": [
1856
+ 2048
1857
+ ],
1858
+ "dtype": "float16",
1859
+ "format": "f32-to-bf16",
1860
+ "nbytes": 4096,
1861
+ "byteOffset": 8388608
1862
+ },
1863
+ {
1864
+ "name": "model.layers.4.mlp.down_proj.weight",
1865
+ "shape": [
1866
+ 2048,
1867
+ 5504
1868
+ ],
1869
+ "dtype": "float16",
1870
+ "format": "f32-to-bf16",
1871
+ "nbytes": 22544384,
1872
+ "byteOffset": 8392704
1873
+ },
1874
+ {
1875
+ "name": "model.layers.4.post_attention_layernorm.weight",
1876
+ "shape": [
1877
+ 2048
1878
+ ],
1879
+ "dtype": "float16",
1880
+ "format": "f32-to-bf16",
1881
+ "nbytes": 4096,
1882
+ "byteOffset": 30937088
1883
+ },
1884
+ {
1885
+ "name": "model.layers.4.self_attn.c_attn.bias",
1886
+ "shape": [
1887
+ 6144
1888
+ ],
1889
+ "dtype": "float16",
1890
+ "format": "f32-to-bf16",
1891
+ "nbytes": 12288,
1892
+ "byteOffset": 30941184
1893
+ }
1894
+ ],
1895
+ "md5sum": "87d54674433056fb7111710ce9ee1da1"
1896
+ },
1897
+ {
1898
+ "dataPath": "params_shard_59.bin",
1899
+ "format": "raw-shard",
1900
+ "nbytes": 45088768,
1901
+ "records": [
1902
+ {
1903
+ "name": "model.layers.5.mlp.gate_up_proj.weight",
1904
+ "shape": [
1905
+ 11008,
1906
+ 2048
1907
+ ],
1908
+ "dtype": "float16",
1909
+ "format": "f32-to-bf16",
1910
+ "nbytes": 45088768,
1911
+ "byteOffset": 0
1912
+ }
1913
+ ],
1914
+ "md5sum": "e6d8a89c39719c8894be8677b84c34bd"
1915
+ },
1916
+ {
1917
+ "dataPath": "params_shard_60.bin",
1918
+ "format": "raw-shard",
1919
+ "nbytes": 25165824,
1920
+ "records": [
1921
+ {
1922
+ "name": "model.layers.5.self_attn.c_attn.weight",
1923
+ "shape": [
1924
+ 6144,
1925
+ 2048
1926
+ ],
1927
+ "dtype": "float16",
1928
+ "format": "f32-to-bf16",
1929
+ "nbytes": 25165824,
1930
+ "byteOffset": 0
1931
+ }
1932
+ ],
1933
+ "md5sum": "914748c73b153225064d5473a71bf480"
1934
+ },
1935
+ {
1936
+ "dataPath": "params_shard_61.bin",
1937
+ "format": "raw-shard",
1938
+ "nbytes": 30953472,
1939
+ "records": [
1940
+ {
1941
+ "name": "model.layers.4.self_attn.o_proj.weight",
1942
+ "shape": [
1943
+ 2048,
1944
+ 2048
1945
+ ],
1946
+ "dtype": "float16",
1947
+ "format": "f32-to-bf16",
1948
+ "nbytes": 8388608,
1949
+ "byteOffset": 0
1950
+ },
1951
+ {
1952
+ "name": "model.layers.5.input_layernorm.weight",
1953
+ "shape": [
1954
+ 2048
1955
+ ],
1956
+ "dtype": "float16",
1957
+ "format": "f32-to-bf16",
1958
+ "nbytes": 4096,
1959
+ "byteOffset": 8388608
1960
+ },
1961
+ {
1962
+ "name": "model.layers.5.mlp.down_proj.weight",
1963
+ "shape": [
1964
+ 2048,
1965
+ 5504
1966
+ ],
1967
+ "dtype": "float16",
1968
+ "format": "f32-to-bf16",
1969
+ "nbytes": 22544384,
1970
+ "byteOffset": 8392704
1971
+ },
1972
+ {
1973
+ "name": "model.layers.5.post_attention_layernorm.weight",
1974
+ "shape": [
1975
+ 2048
1976
+ ],
1977
+ "dtype": "float16",
1978
+ "format": "f32-to-bf16",
1979
+ "nbytes": 4096,
1980
+ "byteOffset": 30937088
1981
+ },
1982
+ {
1983
+ "name": "model.layers.5.self_attn.c_attn.bias",
1984
+ "shape": [
1985
+ 6144
1986
+ ],
1987
+ "dtype": "float16",
1988
+ "format": "f32-to-bf16",
1989
+ "nbytes": 12288,
1990
+ "byteOffset": 30941184
1991
+ }
1992
+ ],
1993
+ "md5sum": "194c16b3e951936f2662d8060d5f7163"
1994
+ },
1995
+ {
1996
+ "dataPath": "params_shard_62.bin",
1997
+ "format": "raw-shard",
1998
+ "nbytes": 45088768,
1999
+ "records": [
2000
+ {
2001
+ "name": "model.layers.6.mlp.gate_up_proj.weight",
2002
+ "shape": [
2003
+ 11008,
2004
+ 2048
2005
+ ],
2006
+ "dtype": "float16",
2007
+ "format": "f32-to-bf16",
2008
+ "nbytes": 45088768,
2009
+ "byteOffset": 0
2010
+ }
2011
+ ],
2012
+ "md5sum": "0d2929123d475dddbf5348e7c3bca32e"
2013
+ },
2014
+ {
2015
+ "dataPath": "params_shard_63.bin",
2016
+ "format": "raw-shard",
2017
+ "nbytes": 25165824,
2018
+ "records": [
2019
+ {
2020
+ "name": "model.layers.6.self_attn.c_attn.weight",
2021
+ "shape": [
2022
+ 6144,
2023
+ 2048
2024
+ ],
2025
+ "dtype": "float16",
2026
+ "format": "f32-to-bf16",
2027
+ "nbytes": 25165824,
2028
+ "byteOffset": 0
2029
+ }
2030
+ ],
2031
+ "md5sum": "4c16d2f3f5523fedb224ee355179c4bc"
2032
+ },
2033
+ {
2034
+ "dataPath": "params_shard_64.bin",
2035
+ "format": "raw-shard",
2036
+ "nbytes": 30953472,
2037
+ "records": [
2038
+ {
2039
+ "name": "model.layers.5.self_attn.o_proj.weight",
2040
+ "shape": [
2041
+ 2048,
2042
+ 2048
2043
+ ],
2044
+ "dtype": "float16",
2045
+ "format": "f32-to-bf16",
2046
+ "nbytes": 8388608,
2047
+ "byteOffset": 0
2048
+ },
2049
+ {
2050
+ "name": "model.layers.6.input_layernorm.weight",
2051
+ "shape": [
2052
+ 2048
2053
+ ],
2054
+ "dtype": "float16",
2055
+ "format": "f32-to-bf16",
2056
+ "nbytes": 4096,
2057
+ "byteOffset": 8388608
2058
+ },
2059
+ {
2060
+ "name": "model.layers.6.mlp.down_proj.weight",
2061
+ "shape": [
2062
+ 2048,
2063
+ 5504
2064
+ ],
2065
+ "dtype": "float16",
2066
+ "format": "f32-to-bf16",
2067
+ "nbytes": 22544384,
2068
+ "byteOffset": 8392704
2069
+ },
2070
+ {
2071
+ "name": "model.layers.6.post_attention_layernorm.weight",
2072
+ "shape": [
2073
+ 2048
2074
+ ],
2075
+ "dtype": "float16",
2076
+ "format": "f32-to-bf16",
2077
+ "nbytes": 4096,
2078
+ "byteOffset": 30937088
2079
+ },
2080
+ {
2081
+ "name": "model.layers.6.self_attn.c_attn.bias",
2082
+ "shape": [
2083
+ 6144
2084
+ ],
2085
+ "dtype": "float16",
2086
+ "format": "f32-to-bf16",
2087
+ "nbytes": 12288,
2088
+ "byteOffset": 30941184
2089
+ }
2090
+ ],
2091
+ "md5sum": "a4ae8bed4420f2ccc51bd6d26b80d703"
2092
+ },
2093
+ {
2094
+ "dataPath": "params_shard_65.bin",
2095
+ "format": "raw-shard",
2096
+ "nbytes": 45088768,
2097
+ "records": [
2098
+ {
2099
+ "name": "model.layers.7.mlp.gate_up_proj.weight",
2100
+ "shape": [
2101
+ 11008,
2102
+ 2048
2103
+ ],
2104
+ "dtype": "float16",
2105
+ "format": "f32-to-bf16",
2106
+ "nbytes": 45088768,
2107
+ "byteOffset": 0
2108
+ }
2109
+ ],
2110
+ "md5sum": "ac79b2cd07190cc8f27f6d39447d8008"
2111
+ },
2112
+ {
2113
+ "dataPath": "params_shard_66.bin",
2114
+ "format": "raw-shard",
2115
+ "nbytes": 25165824,
2116
+ "records": [
2117
+ {
2118
+ "name": "model.layers.7.self_attn.c_attn.weight",
2119
+ "shape": [
2120
+ 6144,
2121
+ 2048
2122
+ ],
2123
+ "dtype": "float16",
2124
+ "format": "f32-to-bf16",
2125
+ "nbytes": 25165824,
2126
+ "byteOffset": 0
2127
+ }
2128
+ ],
2129
+ "md5sum": "9c74c6e83c316439766dbabd9b4bfe61"
2130
+ },
2131
+ {
2132
+ "dataPath": "params_shard_67.bin",
2133
+ "format": "raw-shard",
2134
+ "nbytes": 30953472,
2135
+ "records": [
2136
+ {
2137
+ "name": "model.layers.6.self_attn.o_proj.weight",
2138
+ "shape": [
2139
+ 2048,
2140
+ 2048
2141
+ ],
2142
+ "dtype": "float16",
2143
+ "format": "f32-to-bf16",
2144
+ "nbytes": 8388608,
2145
+ "byteOffset": 0
2146
+ },
2147
+ {
2148
+ "name": "model.layers.7.input_layernorm.weight",
2149
+ "shape": [
2150
+ 2048
2151
+ ],
2152
+ "dtype": "float16",
2153
+ "format": "f32-to-bf16",
2154
+ "nbytes": 4096,
2155
+ "byteOffset": 8388608
2156
+ },
2157
+ {
2158
+ "name": "model.layers.7.mlp.down_proj.weight",
2159
+ "shape": [
2160
+ 2048,
2161
+ 5504
2162
+ ],
2163
+ "dtype": "float16",
2164
+ "format": "f32-to-bf16",
2165
+ "nbytes": 22544384,
2166
+ "byteOffset": 8392704
2167
+ },
2168
+ {
2169
+ "name": "model.layers.7.post_attention_layernorm.weight",
2170
+ "shape": [
2171
+ 2048
2172
+ ],
2173
+ "dtype": "float16",
2174
+ "format": "f32-to-bf16",
2175
+ "nbytes": 4096,
2176
+ "byteOffset": 30937088
2177
+ },
2178
+ {
2179
+ "name": "model.layers.7.self_attn.c_attn.bias",
2180
+ "shape": [
2181
+ 6144
2182
+ ],
2183
+ "dtype": "float16",
2184
+ "format": "f32-to-bf16",
2185
+ "nbytes": 12288,
2186
+ "byteOffset": 30941184
2187
+ }
2188
+ ],
2189
+ "md5sum": "189cf5ee330a2d298e3bf44a5d79c926"
2190
+ },
2191
+ {
2192
+ "dataPath": "params_shard_68.bin",
2193
+ "format": "raw-shard",
2194
+ "nbytes": 45088768,
2195
+ "records": [
2196
+ {
2197
+ "name": "model.layers.8.mlp.gate_up_proj.weight",
2198
+ "shape": [
2199
+ 11008,
2200
+ 2048
2201
+ ],
2202
+ "dtype": "float16",
2203
+ "format": "f32-to-bf16",
2204
+ "nbytes": 45088768,
2205
+ "byteOffset": 0
2206
+ }
2207
+ ],
2208
+ "md5sum": "f82bde1caaf69d79b46d16c9f3630470"
2209
+ },
2210
+ {
2211
+ "dataPath": "params_shard_69.bin",
2212
+ "format": "raw-shard",
2213
+ "nbytes": 25165824,
2214
+ "records": [
2215
+ {
2216
+ "name": "model.layers.8.self_attn.c_attn.weight",
2217
+ "shape": [
2218
+ 6144,
2219
+ 2048
2220
+ ],
2221
+ "dtype": "float16",
2222
+ "format": "f32-to-bf16",
2223
+ "nbytes": 25165824,
2224
+ "byteOffset": 0
2225
+ }
2226
+ ],
2227
+ "md5sum": "04b1b10c63c030e8b4461af7528fddba"
2228
+ },
2229
+ {
2230
+ "dataPath": "params_shard_70.bin",
2231
+ "format": "raw-shard",
2232
+ "nbytes": 30953472,
2233
+ "records": [
2234
+ {
2235
+ "name": "model.layers.7.self_attn.o_proj.weight",
2236
+ "shape": [
2237
+ 2048,
2238
+ 2048
2239
+ ],
2240
+ "dtype": "float16",
2241
+ "format": "f32-to-bf16",
2242
+ "nbytes": 8388608,
2243
+ "byteOffset": 0
2244
+ },
2245
+ {
2246
+ "name": "model.layers.8.input_layernorm.weight",
2247
+ "shape": [
2248
+ 2048
2249
+ ],
2250
+ "dtype": "float16",
2251
+ "format": "f32-to-bf16",
2252
+ "nbytes": 4096,
2253
+ "byteOffset": 8388608
2254
+ },
2255
+ {
2256
+ "name": "model.layers.8.mlp.down_proj.weight",
2257
+ "shape": [
2258
+ 2048,
2259
+ 5504
2260
+ ],
2261
+ "dtype": "float16",
2262
+ "format": "f32-to-bf16",
2263
+ "nbytes": 22544384,
2264
+ "byteOffset": 8392704
2265
+ },
2266
+ {
2267
+ "name": "model.layers.8.post_attention_layernorm.weight",
2268
+ "shape": [
2269
+ 2048
2270
+ ],
2271
+ "dtype": "float16",
2272
+ "format": "f32-to-bf16",
2273
+ "nbytes": 4096,
2274
+ "byteOffset": 30937088
2275
+ },
2276
+ {
2277
+ "name": "model.layers.8.self_attn.c_attn.bias",
2278
+ "shape": [
2279
+ 6144
2280
+ ],
2281
+ "dtype": "float16",
2282
+ "format": "f32-to-bf16",
2283
+ "nbytes": 12288,
2284
+ "byteOffset": 30941184
2285
+ }
2286
+ ],
2287
+ "md5sum": "dd4892ac9920883590d988c7a442ec0e"
2288
+ },
2289
+ {
2290
+ "dataPath": "params_shard_71.bin",
2291
+ "format": "raw-shard",
2292
+ "nbytes": 45088768,
2293
+ "records": [
2294
+ {
2295
+ "name": "model.layers.9.mlp.gate_up_proj.weight",
2296
+ "shape": [
2297
+ 11008,
2298
+ 2048
2299
+ ],
2300
+ "dtype": "float16",
2301
+ "format": "f32-to-bf16",
2302
+ "nbytes": 45088768,
2303
+ "byteOffset": 0
2304
+ }
2305
+ ],
2306
+ "md5sum": "396e3d7bfa6e3f6cda19abbb40b9c81f"
2307
+ },
2308
+ {
2309
+ "dataPath": "params_shard_72.bin",
2310
+ "format": "raw-shard",
2311
+ "nbytes": 25165824,
2312
+ "records": [
2313
+ {
2314
+ "name": "model.layers.9.self_attn.c_attn.weight",
2315
+ "shape": [
2316
+ 6144,
2317
+ 2048
2318
+ ],
2319
+ "dtype": "float16",
2320
+ "format": "f32-to-bf16",
2321
+ "nbytes": 25165824,
2322
+ "byteOffset": 0
2323
+ }
2324
+ ],
2325
+ "md5sum": "a9fe1c41e97698293f83322043b116ab"
2326
+ },
2327
+ {
2328
+ "dataPath": "params_shard_73.bin",
2329
+ "format": "raw-shard",
2330
+ "nbytes": 30953472,
2331
+ "records": [
2332
+ {
2333
+ "name": "model.layers.8.self_attn.o_proj.weight",
2334
+ "shape": [
2335
+ 2048,
2336
+ 2048
2337
+ ],
2338
+ "dtype": "float16",
2339
+ "format": "f32-to-bf16",
2340
+ "nbytes": 8388608,
2341
+ "byteOffset": 0
2342
+ },
2343
+ {
2344
+ "name": "model.layers.9.input_layernorm.weight",
2345
+ "shape": [
2346
+ 2048
2347
+ ],
2348
+ "dtype": "float16",
2349
+ "format": "f32-to-bf16",
2350
+ "nbytes": 4096,
2351
+ "byteOffset": 8388608
2352
+ },
2353
+ {
2354
+ "name": "model.layers.9.mlp.down_proj.weight",
2355
+ "shape": [
2356
+ 2048,
2357
+ 5504
2358
+ ],
2359
+ "dtype": "float16",
2360
+ "format": "f32-to-bf16",
2361
+ "nbytes": 22544384,
2362
+ "byteOffset": 8392704
2363
+ },
2364
+ {
2365
+ "name": "model.layers.9.post_attention_layernorm.weight",
2366
+ "shape": [
2367
+ 2048
2368
+ ],
2369
+ "dtype": "float16",
2370
+ "format": "f32-to-bf16",
2371
+ "nbytes": 4096,
2372
+ "byteOffset": 30937088
2373
+ },
2374
+ {
2375
+ "name": "model.layers.9.self_attn.c_attn.bias",
2376
+ "shape": [
2377
+ 6144
2378
+ ],
2379
+ "dtype": "float16",
2380
+ "format": "f32-to-bf16",
2381
+ "nbytes": 12288,
2382
+ "byteOffset": 30941184
2383
+ }
2384
+ ],
2385
+ "md5sum": "9c66125e43d4a5db730097c03f9afc4a"
2386
+ },
2387
+ {
2388
+ "dataPath": "params_shard_74.bin",
2389
+ "format": "raw-shard",
2390
+ "nbytes": 8392704,
2391
+ "records": [
2392
+ {
2393
+ "name": "model.layers.9.self_attn.o_proj.weight",
2394
+ "shape": [
2395
+ 2048,
2396
+ 2048
2397
+ ],
2398
+ "dtype": "float16",
2399
+ "format": "f32-to-bf16",
2400
+ "nbytes": 8388608,
2401
+ "byteOffset": 0
2402
+ },
2403
+ {
2404
+ "name": "model.norm.weight",
2405
+ "shape": [
2406
+ 2048
2407
+ ],
2408
+ "dtype": "float16",
2409
+ "format": "f32-to-bf16",
2410
+ "nbytes": 4096,
2411
+ "byteOffset": 8388608
2412
+ }
2413
+ ],
2414
+ "md5sum": "9139274d063a3faf7d47ce9aaa9431a7"
2415
+ }
2416
+ ]
2417
+ }
params_shard_0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c9cad8debb59070745e4ae08c3264c4b4c96216110344bd92938023ed656ed4
3
+ size 622329856
params_shard_1.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ed510d1449e103c13c03fcba449e6c6c17a6bb9317e4186206a1fec6001ac20
3
+ size 622329856
params_shard_10.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ab5c0a3d4c30ad21bf2ea7237d5bf5795af45f7405e825855fdf5386406f168
3
+ size 30953472
params_shard_11.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c58385f923b32b2012e350c4e74b68c71f78622afd477a75444ee2011724201
3
+ size 45088768
params_shard_12.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1dc594c653c1d71ef16143aeca5c64f7534c87359daaee1ad5be50eec8fdfc51
3
+ size 25165824
params_shard_13.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:440d604b6925c3c6975a1f96e7a0f2bf1e338574070b9437195d6eced7e04087
3
+ size 30953472
params_shard_14.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce233844fa25ebce328a5d691748bf2ed98dbaeb90a5d817735d45be33f3d03f
3
+ size 45088768
params_shard_15.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b27b460c7972ef2e8b2b7da05058aead1965ee05e83d0ec82315af6100e5114c
3
+ size 25165824
params_shard_16.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df6d0fd4b76e1d5a06fdd53dd06a06ef9e9ce889a308b5c0f907259bf9a7d46a
3
+ size 30953472
params_shard_17.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bf83a17300015fd012138b9d0e434c361c49871bb61bf7336faaf7af0976e17
3
+ size 45088768
params_shard_18.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6446362774ccaf70ff18071a7aa979de400e7f923e38b08c3fb40c6d42317f5e
3
+ size 25165824
params_shard_19.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ced80a98a1dd8da97d194a7b29bbd3a4e2cf939276d1f2f97dd723eca5e26d6
3
+ size 30953472
params_shard_2.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eac6cc7c2333fb8d09007bb95d7fdc7ea6301ad0a43b16d60424bf0d2848e94b
3
+ size 45088768
params_shard_20.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9652d50ce82ee62e2efb00cc1b5dc3ed1e75ecb741a17b4349636076c9df4e5d
3
+ size 45088768
params_shard_21.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:001509078e8e51c5ff3e6d61b5755219d654a93df14b225b67fc44b0007b2d54
3
+ size 25165824
params_shard_22.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bead153ead91d8e9ac61e6ccab213b96c5ab50398b186682a2ae54ab147eb9e8
3
+ size 30953472
params_shard_23.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:039f96254b9c801f4f18a948126eb7a254d75a59c9b394ee73516b789dc6a205
3
+ size 45088768
params_shard_24.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15e19a65d38d11e143eaea91ff3f7de4916a1969f510e6181067940f9eabf89f
3
+ size 25165824
params_shard_25.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f53366184657bad4818e5ac127b7f0a5cb8dfa6b33792d3ebeb56e33a9a6276a
3
+ size 30953472
params_shard_26.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:334de12cf2d68c8176223bbdc751259196054b39131343ae54976d75014ce5c9
3
+ size 45088768
params_shard_27.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43c0dc02fa3a776364c69ba4ae46ea49a7adf051a974186fa0af40e6fe024033
3
+ size 25165824
params_shard_28.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80acd7393f142e286ff3ea913c177f017c5ba300dabc4a0301e94119c0fa9f4a
3
+ size 30953472
params_shard_29.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3dba39ea62c3c109ba172c659d914450d16ac6e8b424b73607120e83f94af1d5
3
+ size 45088768
params_shard_3.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ff9818599e4c556169cfd9f3a0d6a8c3b1ad219c6cba393230dd352b0540177
3
+ size 25165824
params_shard_30.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9122ff544b907d827a5cb282f1a3e6aea29e5c67ddd6e82631ea9752bc49c86f
3
+ size 25165824
params_shard_31.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31aec43b21a21fab6fd5e1e0b81c8d912e0d838781ae11332f98cbf91ae066ed
3
+ size 30953472
params_shard_32.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ae9d3222e05facbe17789bba97675e6d093f8c86327260e4f9cdec018340593
3
+ size 45088768
params_shard_33.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:320dab37a4b9a79c399c30163bb4f69f1d1bf20560db926a329bcc03c6810a38
3
+ size 25165824
params_shard_34.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:564e2199b8a099a215f5f73d4b9c96f76d57e1f0124089cf260a18aff5167b8e
3
+ size 30953472
params_shard_35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d37d871319ac146b1c93c27a013563bf18c958772d2ce3fc6c5232ebacb975ed
3
+ size 45088768
params_shard_36.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba8b98c24dddf3f768bf81eb3c22e2440875436b4c0aef4891ed23ee5ea596bf
3
+ size 25165824
params_shard_37.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7d54062cbfa9b0776487a1d34f7725fdef55fb0cf6e0fd4f619e7facdc86a19
3
+ size 30953472
params_shard_38.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ee6f276605f7914698766a7d265460dc6874742a4ab50c3028600ee995389e4
3
+ size 45088768
params_shard_39.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56ae0738d2ef7d25aac3964eded9fa00284dd3a400ce0b2a7ea99b3c552ad1d6
3
+ size 25165824
params_shard_4.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0847f1129b54ca310e1c167bf555d33aca55b4892cceed51ec47302c5e78b27
3
+ size 22544384
params_shard_40.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25475dd10c0476452298bca5512d3d77bd1296b5348bf202e94324af69c89803
3
+ size 30953472
params_shard_41.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5eecbe3668ca8c6132400da7cd69bdb073734bde0f4be4bb764befade875966
3
+ size 45088768
params_shard_42.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03712ffa2006e1941a368f1a47e0bf1061f8043e7d1b3ef5c1d6ae3a0ac51396
3
+ size 25165824
params_shard_43.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c751ff8b26f9ea69e02eb0e18c3a403588bf36638f6be7be15ed16538bf0c13e
3
+ size 30953472
params_shard_44.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c644dc14797a6dac737e7601b87384a0e84581d4602fb0fb382f0444d41f2796
3
+ size 45088768
params_shard_45.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de25db1264b21659cfb1fa0abdfa6cabf8f7bd7f90fc81342896e9078bc058e3
3
+ size 25165824
params_shard_46.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ef21290bad7cb36cd0a91ce163dc0ecfa34fec5e42238b2195f7ec7bbf736fa
3
+ size 30953472
params_shard_47.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34efc941f810c68f578c4a64d50e1e31cb3a951a216ba92e992259970fa153ad
3
+ size 45088768
params_shard_48.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f313fd79a52131d1d7aee8e078a1798e400f73fb962dd2a20dcc4764deb934b9
3
+ size 25165824
params_shard_49.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:064ea88c5baef445e730fe353bfc2eabda808988a91a73288942be72ce9fda79
3
+ size 30953472
params_shard_5.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:253b36367bdfb562b6b694a921d6d46e9019cf99a7aeb7c3d71c1759ae549b39
3
+ size 45088768