Davidqian123 commited on
Commit
5d0a900
·
verified ·
1 Parent(s): 4588c6c

Upload 7 files

Browse files
phi-3-5-mini-instruct-quantized/dequantizer.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9746bfa8958ff0762cbb9f2a6c9b617d787a1badb4d1558d0eaa0c1ddacd054
3
+ size 181
phi-3-5-mini-instruct-quantized/genai_config.json ADDED
@@ -0,0 +1,602 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "bos_token_id": 1,
4
+ "context_length": 4096,
5
+ "decoder": {
6
+ "session_options": {
7
+ "log_id": "onnxruntime-genai",
8
+ "provider_options": [],
9
+ "log_severity_level": 0
10
+ },
11
+ "filename": "model.onnx",
12
+ "head_size": 96,
13
+ "hidden_size": 3072,
14
+ "inputs": {
15
+ "input_ids": "input_ids",
16
+ "attention_mask": "attention_mask_before_processor",
17
+ "position_ids": "position_ids",
18
+ "past_key_names": "past_key_%d_in",
19
+ "past_value_names": "past_value_%d_in"
20
+ },
21
+ "outputs": {
22
+ "logits": "logits_dequantized",
23
+ "present_key_names": "past_key_%d_out",
24
+ "present_value_names": "past_value_%d_out"
25
+ },
26
+ "num_attention_heads": 32,
27
+ "num_hidden_layers": 32,
28
+ "num_key_value_heads": 32,
29
+ "sliding_window_key_value_cache": {
30
+ "window_size": 128,
31
+ "pad_value": 128
32
+ },
33
+ "pipeline": [
34
+ {
35
+ "position_processor": {
36
+ "filename": "position-processor.onnx",
37
+ "inputs": [
38
+ "attention_mask_before_processor",
39
+ "position_ids"
40
+ ],
41
+ "outputs": [
42
+ "attention_mask_before_quantizer",
43
+ "position_ids_cos_before_quantizer",
44
+ "position_ids_sin_before_quantizer"
45
+ ],
46
+ "session_options": {
47
+ "log_id": "onnxruntime-genai.position_processor",
48
+ "provider_options": [
49
+ {}
50
+ ]
51
+ },
52
+ "run_on_token_gen": false
53
+ },
54
+ "position_shifter": {
55
+ "filename": "position-shifter.onnx",
56
+ "inputs": [
57
+ "attention_mask_before_processor",
58
+ "position_ids"
59
+ ],
60
+ "outputs": [
61
+ "attention_mask_shifted",
62
+ "position_ids_shifted",
63
+ "attention_mask_before_quantizer",
64
+ "position_ids_cos_before_quantizer",
65
+ "position_ids_sin_before_quantizer"
66
+ ],
67
+ "session_options": {
68
+ "log_id": "onnxruntime-genai.position_shifter",
69
+ "provider_options": [
70
+ {}
71
+ ]
72
+ },
73
+ "run_on_prompt": false
74
+ },
75
+ "quantizer": {
76
+ "filename": "quantizer.onnx",
77
+ "inputs": [
78
+ "attention_mask_before_quantizer",
79
+ "position_ids_cos_before_quantizer",
80
+ "position_ids_sin_before_quantizer"
81
+ ],
82
+ "outputs": [
83
+ "attention_mask",
84
+ "position_ids_cos",
85
+ "position_ids_sin"
86
+ ],
87
+ "session_options": {
88
+ "log_id": "onnxruntime-genai.quantizer",
89
+ "provider_options": [
90
+ {}
91
+ ]
92
+ }
93
+ },
94
+ "prompt-processor-1": {
95
+ "filename": "ar128_cl4096_1_of_4_qnn_ctx.onnx",
96
+ "inputs": [
97
+ "input_ids",
98
+ "past_key_0_in",
99
+ "past_key_5_in",
100
+ "past_value_5_in",
101
+ "past_value_0_in",
102
+ "past_key_6_in",
103
+ "past_value_6_in",
104
+ "past_key_7_in",
105
+ "past_value_7_in",
106
+ "past_key_1_in",
107
+ "past_value_1_in",
108
+ "past_key_2_in",
109
+ "past_value_2_in",
110
+ "past_key_3_in",
111
+ "past_value_3_in",
112
+ "past_key_4_in",
113
+ "past_value_4_in",
114
+ "position_ids_cos",
115
+ "position_ids_sin",
116
+ "attention_mask"
117
+ ],
118
+ "outputs": [
119
+ "past_value_0_out",
120
+ "past_key_0_out",
121
+ "past_value_1_out",
122
+ "past_key_1_out",
123
+ "past_value_2_out",
124
+ "past_key_2_out",
125
+ "past_value_3_out",
126
+ "past_key_3_out",
127
+ "past_value_4_out",
128
+ "past_key_4_out",
129
+ "past_value_5_out",
130
+ "past_key_5_out",
131
+ "past_value_6_out",
132
+ "past_key_6_out",
133
+ "past_value_7_out",
134
+ "past_key_7_out",
135
+ "_model_layers_7_Add_1_Add_output_0"
136
+ ],
137
+ "session_options": {
138
+ "log_id": "onnxruntime-genai.pp1",
139
+ "provider_options": [
140
+ {
141
+ "qnn": {
142
+ "backend_path": "QnnHtp.dll",
143
+ "htp_performance_mode": "burst",
144
+ "enable_htp_shared_memory_allocator": "1",
145
+ "qnn_context_priority": "high"
146
+ }
147
+ }
148
+ ]
149
+ },
150
+ "run_on_token_gen": false
151
+ },
152
+ "prompt-processor-2": {
153
+ "filename": "ar128_cl4096_2_of_4_qnn_ctx.onnx",
154
+ "inputs": [
155
+ "_model_layers_7_Add_1_Add_output_0",
156
+ "past_key_8_in",
157
+ "past_key_13_in",
158
+ "past_value_13_in",
159
+ "past_value_8_in",
160
+ "past_key_14_in",
161
+ "past_value_14_in",
162
+ "past_key_15_in",
163
+ "past_value_15_in",
164
+ "past_key_9_in",
165
+ "past_value_9_in",
166
+ "past_key_10_in",
167
+ "past_value_10_in",
168
+ "past_key_11_in",
169
+ "past_value_11_in",
170
+ "past_key_12_in",
171
+ "past_value_12_in",
172
+ "position_ids_cos",
173
+ "position_ids_sin",
174
+ "attention_mask"
175
+ ],
176
+ "outputs": [
177
+ "past_value_8_out",
178
+ "past_key_8_out",
179
+ "past_value_9_out",
180
+ "past_key_9_out",
181
+ "past_value_10_out",
182
+ "past_key_10_out",
183
+ "past_value_11_out",
184
+ "past_key_11_out",
185
+ "past_value_12_out",
186
+ "past_key_12_out",
187
+ "past_value_13_out",
188
+ "past_key_13_out",
189
+ "past_value_14_out",
190
+ "past_key_14_out",
191
+ "past_value_15_out",
192
+ "past_key_15_out",
193
+ "_model_layers_15_Add_1_Add_output_0"
194
+ ],
195
+ "session_options": {
196
+ "log_id": "onnxruntime-genai.pp2",
197
+ "provider_options": [
198
+ {
199
+ "qnn": {
200
+ "backend_path": "QnnHtp.dll",
201
+ "htp_performance_mode": "burst",
202
+ "enable_htp_shared_memory_allocator": "1",
203
+ "qnn_context_priority": "high"
204
+ }
205
+ }
206
+ ]
207
+ },
208
+ "run_on_token_gen": false
209
+ },
210
+ "prompt-processor-3": {
211
+ "filename": "ar128_cl4096_3_of_4_qnn_ctx.onnx",
212
+ "inputs": [
213
+ "_model_layers_15_Add_1_Add_output_0",
214
+ "past_key_16_in",
215
+ "past_key_21_in",
216
+ "past_value_21_in",
217
+ "past_value_16_in",
218
+ "past_key_22_in",
219
+ "past_value_22_in",
220
+ "past_key_23_in",
221
+ "past_value_23_in",
222
+ "past_key_17_in",
223
+ "past_value_17_in",
224
+ "past_key_18_in",
225
+ "past_value_18_in",
226
+ "past_key_19_in",
227
+ "past_value_19_in",
228
+ "past_key_20_in",
229
+ "past_value_20_in",
230
+ "position_ids_cos",
231
+ "position_ids_sin",
232
+ "attention_mask"
233
+ ],
234
+ "outputs": [
235
+ "past_value_16_out",
236
+ "past_key_16_out",
237
+ "past_value_17_out",
238
+ "past_key_17_out",
239
+ "past_value_18_out",
240
+ "past_key_18_out",
241
+ "past_value_19_out",
242
+ "past_key_19_out",
243
+ "past_value_20_out",
244
+ "past_key_20_out",
245
+ "past_value_21_out",
246
+ "past_key_21_out",
247
+ "past_value_22_out",
248
+ "past_key_22_out",
249
+ "past_value_23_out",
250
+ "past_key_23_out",
251
+ "_model_layers_23_Add_1_Add_output_0"
252
+ ],
253
+ "session_options": {
254
+ "log_id": "onnxruntime-genai.pp3",
255
+ "provider_options": [
256
+ {
257
+ "qnn": {
258
+ "backend_path": "QnnHtp.dll",
259
+ "htp_performance_mode": "burst",
260
+ "enable_htp_shared_memory_allocator": "1",
261
+ "qnn_context_priority": "high"
262
+ }
263
+ }
264
+ ]
265
+ },
266
+ "run_on_token_gen": false
267
+ },
268
+ "prompt-processor-4": {
269
+ "filename": "ar128_cl4096_4_of_4_qnn_ctx.onnx",
270
+ "inputs": [
271
+ "_model_layers_23_Add_1_Add_output_0",
272
+ "past_key_24_in",
273
+ "past_key_29_in",
274
+ "past_value_29_in",
275
+ "past_value_24_in",
276
+ "past_key_30_in",
277
+ "past_value_30_in",
278
+ "past_key_31_in",
279
+ "past_value_31_in",
280
+ "past_key_25_in",
281
+ "past_value_25_in",
282
+ "past_key_26_in",
283
+ "past_value_26_in",
284
+ "past_key_27_in",
285
+ "past_value_27_in",
286
+ "past_key_28_in",
287
+ "past_value_28_in",
288
+ "position_ids_cos",
289
+ "position_ids_sin",
290
+ "attention_mask"
291
+ ],
292
+ "outputs": [
293
+ "past_value_24_out",
294
+ "past_key_24_out",
295
+ "past_value_25_out",
296
+ "past_key_25_out",
297
+ "past_value_26_out",
298
+ "past_key_26_out",
299
+ "past_value_27_out",
300
+ "past_key_27_out",
301
+ "past_value_28_out",
302
+ "past_key_28_out",
303
+ "past_value_29_out",
304
+ "past_key_29_out",
305
+ "past_value_30_out",
306
+ "past_key_30_out",
307
+ "past_value_31_out",
308
+ "past_key_31_out",
309
+ "logits"
310
+ ],
311
+ "session_options": {
312
+ "log_id": "onnxruntime-genai.pp4",
313
+ "provider_options": [
314
+ {
315
+ "qnn": {
316
+ "backend_path": "QnnHtp.dll",
317
+ "htp_performance_mode": "burst",
318
+ "enable_htp_shared_memory_allocator": "1",
319
+ "qnn_context_priority": "high"
320
+ }
321
+ }
322
+ ]
323
+ },
324
+ "run_on_token_gen": false
325
+ },
326
+ "token-generator-1": {
327
+ "filename": "ar1_cl4096_1_of_4_qnn_ctx.onnx",
328
+ "inputs": [
329
+ "input_ids",
330
+ "past_key_0_in",
331
+ "past_key_5_in",
332
+ "past_value_5_in",
333
+ "past_value_0_in",
334
+ "past_key_6_in",
335
+ "past_value_6_in",
336
+ "past_key_7_in",
337
+ "past_value_7_in",
338
+ "past_key_1_in",
339
+ "past_value_1_in",
340
+ "past_key_2_in",
341
+ "past_value_2_in",
342
+ "past_key_3_in",
343
+ "past_value_3_in",
344
+ "past_key_4_in",
345
+ "past_value_4_in",
346
+ "position_ids_cos",
347
+ "position_ids_sin",
348
+ "attention_mask"
349
+ ],
350
+ "outputs": [
351
+ "past_value_0_out",
352
+ "past_key_0_out",
353
+ "past_value_1_out",
354
+ "past_key_1_out",
355
+ "past_value_2_out",
356
+ "past_key_2_out",
357
+ "past_value_3_out",
358
+ "past_key_3_out",
359
+ "past_value_4_out",
360
+ "past_key_4_out",
361
+ "past_value_5_out",
362
+ "past_key_5_out",
363
+ "past_value_6_out",
364
+ "past_key_6_out",
365
+ "past_value_7_out",
366
+ "past_key_7_out",
367
+ "_model_layers_7_Add_1_Add_output_0"
368
+ ],
369
+ "session_options": {
370
+ "log_id": "onnxruntime-genai.tg1",
371
+ "provider_options": [
372
+ {
373
+ "qnn": {
374
+ "backend_path": "QnnHtp.dll",
375
+ "htp_performance_mode": "burst",
376
+ "enable_htp_shared_memory_allocator": "1",
377
+ "qnn_context_priority": "high"
378
+ }
379
+ }
380
+ ]
381
+ },
382
+ "run_on_prompt": false
383
+ },
384
+ "token-generator-2": {
385
+ "filename": "ar1_cl4096_2_of_4_qnn_ctx.onnx",
386
+ "inputs": [
387
+ "_model_layers_7_Add_1_Add_output_0",
388
+ "past_key_8_in",
389
+ "past_key_13_in",
390
+ "past_value_13_in",
391
+ "past_value_8_in",
392
+ "past_key_14_in",
393
+ "past_value_14_in",
394
+ "past_key_15_in",
395
+ "past_value_15_in",
396
+ "past_key_9_in",
397
+ "past_value_9_in",
398
+ "past_key_10_in",
399
+ "past_value_10_in",
400
+ "past_key_11_in",
401
+ "past_value_11_in",
402
+ "past_key_12_in",
403
+ "past_value_12_in",
404
+ "position_ids_cos",
405
+ "position_ids_sin",
406
+ "attention_mask"
407
+ ],
408
+ "outputs": [
409
+ "past_value_8_out",
410
+ "past_key_8_out",
411
+ "past_value_9_out",
412
+ "past_key_9_out",
413
+ "past_value_10_out",
414
+ "past_key_10_out",
415
+ "past_value_11_out",
416
+ "past_key_11_out",
417
+ "past_value_12_out",
418
+ "past_key_12_out",
419
+ "past_value_13_out",
420
+ "past_key_13_out",
421
+ "past_value_14_out",
422
+ "past_key_14_out",
423
+ "past_value_15_out",
424
+ "past_key_15_out",
425
+ "_model_layers_15_Add_1_Add_output_0"
426
+ ],
427
+ "session_options": {
428
+ "log_id": "onnxruntime-genai.tg2",
429
+ "provider_options": [
430
+ {
431
+ "qnn": {
432
+ "backend_path": "QnnHtp.dll",
433
+ "htp_performance_mode": "burst",
434
+ "enable_htp_shared_memory_allocator": "1",
435
+ "qnn_context_priority": "high"
436
+ }
437
+ }
438
+ ]
439
+ },
440
+ "run_on_prompt": false
441
+ },
442
+ "token-generator-3": {
443
+ "filename": "ar1_cl4096_3_of_4_qnn_ctx.onnx",
444
+ "inputs": [
445
+ "_model_layers_15_Add_1_Add_output_0",
446
+ "past_key_16_in",
447
+ "past_key_21_in",
448
+ "past_value_21_in",
449
+ "past_value_16_in",
450
+ "past_key_22_in",
451
+ "past_value_22_in",
452
+ "past_key_23_in",
453
+ "past_value_23_in",
454
+ "past_key_17_in",
455
+ "past_value_17_in",
456
+ "past_key_18_in",
457
+ "past_value_18_in",
458
+ "past_key_19_in",
459
+ "past_value_19_in",
460
+ "past_key_20_in",
461
+ "past_value_20_in",
462
+ "position_ids_cos",
463
+ "position_ids_sin",
464
+ "attention_mask"
465
+ ],
466
+ "outputs": [
467
+ "past_value_16_out",
468
+ "past_key_16_out",
469
+ "past_value_17_out",
470
+ "past_key_17_out",
471
+ "past_value_18_out",
472
+ "past_key_18_out",
473
+ "past_value_19_out",
474
+ "past_key_19_out",
475
+ "past_value_20_out",
476
+ "past_key_20_out",
477
+ "past_value_21_out",
478
+ "past_key_21_out",
479
+ "past_value_22_out",
480
+ "past_key_22_out",
481
+ "past_value_23_out",
482
+ "past_key_23_out",
483
+ "_model_layers_23_Add_1_Add_output_0"
484
+ ],
485
+ "session_options": {
486
+ "log_id": "onnxruntime-genai.tg3",
487
+ "provider_options": [
488
+ {
489
+ "qnn": {
490
+ "backend_path": "QnnHtp.dll",
491
+ "htp_performance_mode": "burst",
492
+ "enable_htp_shared_memory_allocator": "1",
493
+ "qnn_context_priority": "high"
494
+ }
495
+ }
496
+ ]
497
+ },
498
+ "run_on_prompt": false
499
+ },
500
+ "token-generator-4": {
501
+ "filename": "ar1_cl4096_4_of_4_qnn_ctx.onnx",
502
+ "inputs": [
503
+ "_model_layers_23_Add_1_Add_output_0",
504
+ "past_key_24_in",
505
+ "past_key_29_in",
506
+ "past_value_29_in",
507
+ "past_value_24_in",
508
+ "past_key_30_in",
509
+ "past_value_30_in",
510
+ "past_key_31_in",
511
+ "past_value_31_in",
512
+ "past_key_25_in",
513
+ "past_value_25_in",
514
+ "past_key_26_in",
515
+ "past_value_26_in",
516
+ "past_key_27_in",
517
+ "past_value_27_in",
518
+ "past_key_28_in",
519
+ "past_value_28_in",
520
+ "position_ids_cos",
521
+ "position_ids_sin",
522
+ "attention_mask"
523
+ ],
524
+ "outputs": [
525
+ "past_value_24_out",
526
+ "past_key_24_out",
527
+ "past_value_25_out",
528
+ "past_key_25_out",
529
+ "past_value_26_out",
530
+ "past_key_26_out",
531
+ "past_value_27_out",
532
+ "past_key_27_out",
533
+ "past_value_28_out",
534
+ "past_key_28_out",
535
+ "past_value_29_out",
536
+ "past_key_29_out",
537
+ "past_value_30_out",
538
+ "past_key_30_out",
539
+ "past_value_31_out",
540
+ "past_key_31_out",
541
+ "logits"
542
+ ],
543
+ "session_options": {
544
+ "log_id": "onnxruntime-genai.tg4",
545
+ "provider_options": [
546
+ {
547
+ "qnn": {
548
+ "backend_path": "QnnHtp.dll",
549
+ "htp_performance_mode": "burst",
550
+ "enable_htp_shared_memory_allocator": "1",
551
+ "qnn_context_priority": "high"
552
+ }
553
+ }
554
+ ]
555
+ },
556
+ "run_on_prompt": false
557
+ },
558
+ "dequantizer": {
559
+ "filename": "dequantizer.onnx",
560
+ "inputs": [
561
+ "logits"
562
+ ],
563
+ "outputs": [
564
+ "logits_dequantized"
565
+ ],
566
+ "session_options": {
567
+ "log_id": "onnxruntime-genai.dequantizer",
568
+ "provider_options": [
569
+ {}
570
+ ]
571
+ }
572
+ }
573
+ }
574
+ ]
575
+ },
576
+ "eos_token_id": [
577
+ 32007,
578
+ 32001,
579
+ 32000,
580
+ 2
581
+ ],
582
+ "pad_token_id": 32000,
583
+ "type": "decoder-pipeline",
584
+ "vocab_size": 32064
585
+ },
586
+ "search": {
587
+ "diversity_penalty": 0.0,
588
+ "do_sample": false,
589
+ "early_stopping": true,
590
+ "length_penalty": 1.0,
591
+ "max_length": 131072,
592
+ "min_length": 0,
593
+ "no_repeat_ngram_size": 0,
594
+ "num_beams": 1,
595
+ "num_return_sequences": 1,
596
+ "past_present_share_buffer": true,
597
+ "repetition_penalty": 1.0,
598
+ "temperature": 1.0,
599
+ "top_k": 1,
600
+ "top_p": 1.0
601
+ }
602
+ }
phi-3-5-mini-instruct-quantized/position-processor.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e941a866d33c8d8bfad0b70549d5877abee2051517278cbf33ac7b0cefcc854d
3
+ size 2246935
phi-3-5-mini-instruct-quantized/quantizer.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea1ae96fc3498091134adf77c221c4cc4f9ce4621e882e58623670d7eb16ebb2
3
+ size 59682
phi-3-5-mini-instruct-quantized/special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<|endoftext|>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
phi-3-5-mini-instruct-quantized/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
phi-3-5-mini-instruct-quantized/tokenizer_config.json ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": null,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": true,
27
+ "single_word": false,
28
+ "special": false
29
+ },
30
+ "32000": {
31
+ "content": "<|endoftext|>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false,
36
+ "special": true
37
+ },
38
+ "32001": {
39
+ "content": "<|assistant|>",
40
+ "lstrip": false,
41
+ "normalized": false,
42
+ "rstrip": true,
43
+ "single_word": false,
44
+ "special": true
45
+ },
46
+ "32002": {
47
+ "content": "<|placeholder1|>",
48
+ "lstrip": false,
49
+ "normalized": false,
50
+ "rstrip": true,
51
+ "single_word": false,
52
+ "special": true
53
+ },
54
+ "32003": {
55
+ "content": "<|placeholder2|>",
56
+ "lstrip": false,
57
+ "normalized": false,
58
+ "rstrip": true,
59
+ "single_word": false,
60
+ "special": true
61
+ },
62
+ "32004": {
63
+ "content": "<|placeholder3|>",
64
+ "lstrip": false,
65
+ "normalized": false,
66
+ "rstrip": true,
67
+ "single_word": false,
68
+ "special": true
69
+ },
70
+ "32005": {
71
+ "content": "<|placeholder4|>",
72
+ "lstrip": false,
73
+ "normalized": false,
74
+ "rstrip": true,
75
+ "single_word": false,
76
+ "special": true
77
+ },
78
+ "32006": {
79
+ "content": "<|system|>",
80
+ "lstrip": false,
81
+ "normalized": false,
82
+ "rstrip": true,
83
+ "single_word": false,
84
+ "special": true
85
+ },
86
+ "32007": {
87
+ "content": "<|end|>",
88
+ "lstrip": false,
89
+ "normalized": false,
90
+ "rstrip": true,
91
+ "single_word": false,
92
+ "special": true
93
+ },
94
+ "32008": {
95
+ "content": "<|placeholder5|>",
96
+ "lstrip": false,
97
+ "normalized": false,
98
+ "rstrip": true,
99
+ "single_word": false,
100
+ "special": true
101
+ },
102
+ "32009": {
103
+ "content": "<|placeholder6|>",
104
+ "lstrip": false,
105
+ "normalized": false,
106
+ "rstrip": true,
107
+ "single_word": false,
108
+ "special": true
109
+ },
110
+ "32010": {
111
+ "content": "<|user|>",
112
+ "lstrip": false,
113
+ "normalized": false,
114
+ "rstrip": true,
115
+ "single_word": false,
116
+ "special": true
117
+ }
118
+ },
119
+ "bos_token": "<s>",
120
+ "chat_template": "{% for message in messages %}{% if message['role'] == 'system' and message['content'] %}{{'<|system|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'user' %}{{'<|user|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>\n' + message['content'] + '<|end|>\n'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %}",
121
+ "clean_up_tokenization_spaces": false,
122
+ "eos_token": "<|endoftext|>",
123
+ "legacy": false,
124
+ "model_max_length": 131072,
125
+ "pad_token": "<|endoftext|>",
126
+ "padding_side": "left",
127
+ "sp_model_kwargs": {},
128
+ "tokenizer_class": "LlamaTokenizer",
129
+ "unk_token": "<unk>",
130
+ "use_default_system_prompt": false
131
+ }