Spaces:
Running
Running
Yang Gu
commited on
Commit
·
fa2d4e3
1
Parent(s):
37cf2c3
Support ort-phi3 and polish ort-phi2
Browse files- demo/ort-phi2/index.html +3 -3
- demo/ort-phi2/models/phi2-int4/tokenizer.json +0 -0
- demo/ort-phi2/models/phi2-int4/tokenizer_config.json +323 -0
- demo/ort-phi3/index.html +375 -0
- demo/ort-phi3/models/phi3-int4/tokenizer.json +0 -0
- demo/ort-phi3/models/phi3-int4/tokenizer_config.json +349 -0
- main.js +12 -9
demo/ort-phi2/index.html
CHANGED
@@ -7,8 +7,8 @@
|
|
7 |
|
8 |
<body>
|
9 |
<script src="../../util.js"></script>
|
10 |
-
<script src="https://cdn.jsdelivr.net/npm/onnxruntime-web@dev/dist/ort.webgpu.min.js"></script>
|
11 |
-
|
12 |
|
13 |
<script type="module">
|
14 |
import { AutoTokenizer, env } from '../../transformers/transformers.js';
|
@@ -88,7 +88,7 @@
|
|
88 |
const model_config = JSON.parse(textDecoder.decode(json_bytes));
|
89 |
|
90 |
const model_bytes = await fetchAndCache(modelPath + "/phi2-int4.onnx");
|
91 |
-
const externaldata = (model.externaldata) ? await fetchAndCache(modelPath + '/phi2-int4.
|
92 |
let modelSize = model_bytes.byteLength;
|
93 |
if (externaldata) {
|
94 |
modelSize += externaldata.byteLength;
|
|
|
7 |
|
8 |
<body>
|
9 |
<script src="../../util.js"></script>
|
10 |
+
<!-- <script src="https://cdn.jsdelivr.net/npm/onnxruntime-web@dev/dist/ort.webgpu.min.js"></script> -->
|
11 |
+
<script src="https://wp-27.sh.intel.com/workspace/project/onnxruntime/js/web/dist/ort.webgpu.min.js"></script>
|
12 |
|
13 |
<script type="module">
|
14 |
import { AutoTokenizer, env } from '../../transformers/transformers.js';
|
|
|
88 |
const model_config = JSON.parse(textDecoder.decode(json_bytes));
|
89 |
|
90 |
const model_bytes = await fetchAndCache(modelPath + "/phi2-int4.onnx");
|
91 |
+
const externaldata = (model.externaldata) ? await fetchAndCache(modelPath + '/phi2-int4.data') : false;
|
92 |
let modelSize = model_bytes.byteLength;
|
93 |
if (externaldata) {
|
94 |
modelSize += externaldata.byteLength;
|
demo/ort-phi2/models/phi2-int4/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
demo/ort-phi2/models/phi2-int4/tokenizer_config.json
ADDED
@@ -0,0 +1,323 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_prefix_space": false,
|
3 |
+
"added_tokens_decoder": {
|
4 |
+
"50256": {
|
5 |
+
"content": "<|endoftext|>",
|
6 |
+
"lstrip": false,
|
7 |
+
"normalized": false,
|
8 |
+
"rstrip": false,
|
9 |
+
"single_word": false,
|
10 |
+
"special": true
|
11 |
+
},
|
12 |
+
"50257": {
|
13 |
+
"content": " ",
|
14 |
+
"lstrip": false,
|
15 |
+
"normalized": true,
|
16 |
+
"rstrip": false,
|
17 |
+
"single_word": false,
|
18 |
+
"special": false
|
19 |
+
},
|
20 |
+
"50258": {
|
21 |
+
"content": " ",
|
22 |
+
"lstrip": false,
|
23 |
+
"normalized": true,
|
24 |
+
"rstrip": false,
|
25 |
+
"single_word": false,
|
26 |
+
"special": false
|
27 |
+
},
|
28 |
+
"50259": {
|
29 |
+
"content": " ",
|
30 |
+
"lstrip": false,
|
31 |
+
"normalized": true,
|
32 |
+
"rstrip": false,
|
33 |
+
"single_word": false,
|
34 |
+
"special": false
|
35 |
+
},
|
36 |
+
"50260": {
|
37 |
+
"content": " ",
|
38 |
+
"lstrip": false,
|
39 |
+
"normalized": true,
|
40 |
+
"rstrip": false,
|
41 |
+
"single_word": false,
|
42 |
+
"special": false
|
43 |
+
},
|
44 |
+
"50261": {
|
45 |
+
"content": " ",
|
46 |
+
"lstrip": false,
|
47 |
+
"normalized": true,
|
48 |
+
"rstrip": false,
|
49 |
+
"single_word": false,
|
50 |
+
"special": false
|
51 |
+
},
|
52 |
+
"50262": {
|
53 |
+
"content": " ",
|
54 |
+
"lstrip": false,
|
55 |
+
"normalized": true,
|
56 |
+
"rstrip": false,
|
57 |
+
"single_word": false,
|
58 |
+
"special": false
|
59 |
+
},
|
60 |
+
"50263": {
|
61 |
+
"content": " ",
|
62 |
+
"lstrip": false,
|
63 |
+
"normalized": true,
|
64 |
+
"rstrip": false,
|
65 |
+
"single_word": false,
|
66 |
+
"special": false
|
67 |
+
},
|
68 |
+
"50264": {
|
69 |
+
"content": " ",
|
70 |
+
"lstrip": false,
|
71 |
+
"normalized": true,
|
72 |
+
"rstrip": false,
|
73 |
+
"single_word": false,
|
74 |
+
"special": false
|
75 |
+
},
|
76 |
+
"50265": {
|
77 |
+
"content": " ",
|
78 |
+
"lstrip": false,
|
79 |
+
"normalized": true,
|
80 |
+
"rstrip": false,
|
81 |
+
"single_word": false,
|
82 |
+
"special": false
|
83 |
+
},
|
84 |
+
"50266": {
|
85 |
+
"content": " ",
|
86 |
+
"lstrip": false,
|
87 |
+
"normalized": true,
|
88 |
+
"rstrip": false,
|
89 |
+
"single_word": false,
|
90 |
+
"special": false
|
91 |
+
},
|
92 |
+
"50267": {
|
93 |
+
"content": " ",
|
94 |
+
"lstrip": false,
|
95 |
+
"normalized": true,
|
96 |
+
"rstrip": false,
|
97 |
+
"single_word": false,
|
98 |
+
"special": false
|
99 |
+
},
|
100 |
+
"50268": {
|
101 |
+
"content": " ",
|
102 |
+
"lstrip": false,
|
103 |
+
"normalized": true,
|
104 |
+
"rstrip": false,
|
105 |
+
"single_word": false,
|
106 |
+
"special": false
|
107 |
+
},
|
108 |
+
"50269": {
|
109 |
+
"content": " ",
|
110 |
+
"lstrip": false,
|
111 |
+
"normalized": true,
|
112 |
+
"rstrip": false,
|
113 |
+
"single_word": false,
|
114 |
+
"special": false
|
115 |
+
},
|
116 |
+
"50270": {
|
117 |
+
"content": " ",
|
118 |
+
"lstrip": false,
|
119 |
+
"normalized": true,
|
120 |
+
"rstrip": false,
|
121 |
+
"single_word": false,
|
122 |
+
"special": false
|
123 |
+
},
|
124 |
+
"50271": {
|
125 |
+
"content": " ",
|
126 |
+
"lstrip": false,
|
127 |
+
"normalized": true,
|
128 |
+
"rstrip": false,
|
129 |
+
"single_word": false,
|
130 |
+
"special": false
|
131 |
+
},
|
132 |
+
"50272": {
|
133 |
+
"content": " ",
|
134 |
+
"lstrip": false,
|
135 |
+
"normalized": true,
|
136 |
+
"rstrip": false,
|
137 |
+
"single_word": false,
|
138 |
+
"special": false
|
139 |
+
},
|
140 |
+
"50273": {
|
141 |
+
"content": " ",
|
142 |
+
"lstrip": false,
|
143 |
+
"normalized": true,
|
144 |
+
"rstrip": false,
|
145 |
+
"single_word": false,
|
146 |
+
"special": false
|
147 |
+
},
|
148 |
+
"50274": {
|
149 |
+
"content": " ",
|
150 |
+
"lstrip": false,
|
151 |
+
"normalized": true,
|
152 |
+
"rstrip": false,
|
153 |
+
"single_word": false,
|
154 |
+
"special": false
|
155 |
+
},
|
156 |
+
"50275": {
|
157 |
+
"content": " ",
|
158 |
+
"lstrip": false,
|
159 |
+
"normalized": true,
|
160 |
+
"rstrip": false,
|
161 |
+
"single_word": false,
|
162 |
+
"special": false
|
163 |
+
},
|
164 |
+
"50276": {
|
165 |
+
"content": " ",
|
166 |
+
"lstrip": false,
|
167 |
+
"normalized": true,
|
168 |
+
"rstrip": false,
|
169 |
+
"single_word": false,
|
170 |
+
"special": false
|
171 |
+
},
|
172 |
+
"50277": {
|
173 |
+
"content": " ",
|
174 |
+
"lstrip": false,
|
175 |
+
"normalized": true,
|
176 |
+
"rstrip": false,
|
177 |
+
"single_word": false,
|
178 |
+
"special": false
|
179 |
+
},
|
180 |
+
"50278": {
|
181 |
+
"content": " ",
|
182 |
+
"lstrip": false,
|
183 |
+
"normalized": true,
|
184 |
+
"rstrip": false,
|
185 |
+
"single_word": false,
|
186 |
+
"special": false
|
187 |
+
},
|
188 |
+
"50279": {
|
189 |
+
"content": " ",
|
190 |
+
"lstrip": false,
|
191 |
+
"normalized": true,
|
192 |
+
"rstrip": false,
|
193 |
+
"single_word": false,
|
194 |
+
"special": false
|
195 |
+
},
|
196 |
+
"50280": {
|
197 |
+
"content": " ",
|
198 |
+
"lstrip": false,
|
199 |
+
"normalized": true,
|
200 |
+
"rstrip": false,
|
201 |
+
"single_word": false,
|
202 |
+
"special": false
|
203 |
+
},
|
204 |
+
"50281": {
|
205 |
+
"content": " ",
|
206 |
+
"lstrip": false,
|
207 |
+
"normalized": true,
|
208 |
+
"rstrip": false,
|
209 |
+
"single_word": false,
|
210 |
+
"special": false
|
211 |
+
},
|
212 |
+
"50282": {
|
213 |
+
"content": " ",
|
214 |
+
"lstrip": false,
|
215 |
+
"normalized": true,
|
216 |
+
"rstrip": false,
|
217 |
+
"single_word": false,
|
218 |
+
"special": false
|
219 |
+
},
|
220 |
+
"50283": {
|
221 |
+
"content": " ",
|
222 |
+
"lstrip": false,
|
223 |
+
"normalized": true,
|
224 |
+
"rstrip": false,
|
225 |
+
"single_word": false,
|
226 |
+
"special": false
|
227 |
+
},
|
228 |
+
"50284": {
|
229 |
+
"content": " ",
|
230 |
+
"lstrip": false,
|
231 |
+
"normalized": true,
|
232 |
+
"rstrip": false,
|
233 |
+
"single_word": false,
|
234 |
+
"special": false
|
235 |
+
},
|
236 |
+
"50285": {
|
237 |
+
"content": " ",
|
238 |
+
"lstrip": false,
|
239 |
+
"normalized": true,
|
240 |
+
"rstrip": false,
|
241 |
+
"single_word": false,
|
242 |
+
"special": false
|
243 |
+
},
|
244 |
+
"50286": {
|
245 |
+
"content": " ",
|
246 |
+
"lstrip": false,
|
247 |
+
"normalized": true,
|
248 |
+
"rstrip": false,
|
249 |
+
"single_word": false,
|
250 |
+
"special": false
|
251 |
+
},
|
252 |
+
"50287": {
|
253 |
+
"content": "\t\t\t\t\t\t\t\t\t",
|
254 |
+
"lstrip": false,
|
255 |
+
"normalized": true,
|
256 |
+
"rstrip": false,
|
257 |
+
"single_word": false,
|
258 |
+
"special": false
|
259 |
+
},
|
260 |
+
"50288": {
|
261 |
+
"content": "\t\t\t\t\t\t\t\t",
|
262 |
+
"lstrip": false,
|
263 |
+
"normalized": true,
|
264 |
+
"rstrip": false,
|
265 |
+
"single_word": false,
|
266 |
+
"special": false
|
267 |
+
},
|
268 |
+
"50289": {
|
269 |
+
"content": "\t\t\t\t\t\t\t",
|
270 |
+
"lstrip": false,
|
271 |
+
"normalized": true,
|
272 |
+
"rstrip": false,
|
273 |
+
"single_word": false,
|
274 |
+
"special": false
|
275 |
+
},
|
276 |
+
"50290": {
|
277 |
+
"content": "\t\t\t\t\t\t",
|
278 |
+
"lstrip": false,
|
279 |
+
"normalized": true,
|
280 |
+
"rstrip": false,
|
281 |
+
"single_word": false,
|
282 |
+
"special": false
|
283 |
+
},
|
284 |
+
"50291": {
|
285 |
+
"content": "\t\t\t\t\t",
|
286 |
+
"lstrip": false,
|
287 |
+
"normalized": true,
|
288 |
+
"rstrip": false,
|
289 |
+
"single_word": false,
|
290 |
+
"special": false
|
291 |
+
},
|
292 |
+
"50292": {
|
293 |
+
"content": "\t\t\t\t",
|
294 |
+
"lstrip": false,
|
295 |
+
"normalized": true,
|
296 |
+
"rstrip": false,
|
297 |
+
"single_word": false,
|
298 |
+
"special": false
|
299 |
+
},
|
300 |
+
"50293": {
|
301 |
+
"content": "\t\t\t",
|
302 |
+
"lstrip": false,
|
303 |
+
"normalized": true,
|
304 |
+
"rstrip": false,
|
305 |
+
"single_word": false,
|
306 |
+
"special": false
|
307 |
+
},
|
308 |
+
"50294": {
|
309 |
+
"content": "\t\t",
|
310 |
+
"lstrip": false,
|
311 |
+
"normalized": true,
|
312 |
+
"rstrip": false,
|
313 |
+
"single_word": false,
|
314 |
+
"special": false
|
315 |
+
}
|
316 |
+
},
|
317 |
+
"bos_token": "<|endoftext|>",
|
318 |
+
"clean_up_tokenization_spaces": true,
|
319 |
+
"eos_token": "<|endoftext|>",
|
320 |
+
"model_max_length": 2048,
|
321 |
+
"tokenizer_class": "CodeGenTokenizer",
|
322 |
+
"unk_token": "<|endoftext|>"
|
323 |
+
}
|
demo/ort-phi3/index.html
ADDED
@@ -0,0 +1,375 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!DOCTYPE html>
|
2 |
+
<html>
|
3 |
+
|
4 |
+
<head>
|
5 |
+
<title>Example</title>
|
6 |
+
</head>
|
7 |
+
|
8 |
+
<body>
|
9 |
+
<script src="../../util.js"></script>
|
10 |
+
<!-- <script src="https://cdn.jsdelivr.net/npm/onnxruntime-web@dev/dist/ort.webgpu.min.js"></script> -->
|
11 |
+
<script src="https://wp-27.sh.intel.com/workspace/project/onnxruntime/js/web/dist/ort.webgpu.min.js"></script>
|
12 |
+
|
13 |
+
<script type="module">
|
14 |
+
import { AutoTokenizer, env } from '../../transformers/transformers.js';
|
15 |
+
|
16 |
+
function log(i) { console.log(i); document.getElementById('status').innerText += `\n${i}`; }
|
17 |
+
|
18 |
+
const MODELS = {
|
19 |
+
"phi3": { name: "phi3", path: "phi3-int4", externaldata: true },
|
20 |
+
}
|
21 |
+
|
22 |
+
function getConfig() {
|
23 |
+
const query = window.location.search.substring(1);
|
24 |
+
var config = {
|
25 |
+
model: "phi3",
|
26 |
+
provider: "webgpu",
|
27 |
+
profiler: 0,
|
28 |
+
verbose: 0,
|
29 |
+
threads: 1,
|
30 |
+
trace: 0,
|
31 |
+
csv: 0,
|
32 |
+
max_tokens: 256,
|
33 |
+
local: 1,
|
34 |
+
}
|
35 |
+
let vars = query.split("&");
|
36 |
+
for (var i = 0; i < vars.length; i++) {
|
37 |
+
let pair = vars[i].split("=");
|
38 |
+
if (pair[0] in config) {
|
39 |
+
const key = pair[0];
|
40 |
+
const value = decodeURIComponent(pair[1]);
|
41 |
+
if (typeof config[key] == "number") {
|
42 |
+
config[key] = parseInt(value);
|
43 |
+
}
|
44 |
+
else {
|
45 |
+
config[key] = value;
|
46 |
+
}
|
47 |
+
} else if (pair[0].length > 0) {
|
48 |
+
throw new Error("unknown argument: " + pair[0]);
|
49 |
+
}
|
50 |
+
}
|
51 |
+
if (MODELS[config.model] !== undefined) {
|
52 |
+
config.model = MODELS[config.model];
|
53 |
+
}
|
54 |
+
return config;
|
55 |
+
}
|
56 |
+
|
57 |
+
class LLM {
|
58 |
+
sess = undefined;
|
59 |
+
profiler = false;
|
60 |
+
trace = false;
|
61 |
+
feed = {};
|
62 |
+
output_tokens = [];
|
63 |
+
eos = 2;
|
64 |
+
need_position_ids = true;
|
65 |
+
stop = false;
|
66 |
+
kv_dims = [];
|
67 |
+
dtype = "float16";
|
68 |
+
|
69 |
+
constructor() {
|
70 |
+
}
|
71 |
+
|
72 |
+
async load(model, options) {
|
73 |
+
const provider = options.provider || "webgpu";
|
74 |
+
const verbose = options.verbose;
|
75 |
+
const local = options.local;
|
76 |
+
this.profiler = options.profiler;
|
77 |
+
this.trace = options.trace;
|
78 |
+
|
79 |
+
const modelPath = getModelsPath() + model.path;
|
80 |
+
|
81 |
+
log(`loading... ${model.name}, ${provider}`);
|
82 |
+
const json_bytes = await fetchAndCache(modelPath + "/config.json");
|
83 |
+
let textDecoder = new TextDecoder();
|
84 |
+
const model_config = JSON.parse(textDecoder.decode(json_bytes));
|
85 |
+
|
86 |
+
const model_bytes = await fetchAndCache(modelPath + "/phi3-int4.onnx");
|
87 |
+
const externaldata = (model.externaldata) ? await fetchAndCache(modelPath + '/phi3-int4.data') : false;
|
88 |
+
let modelSize = model_bytes.byteLength;
|
89 |
+
if (externaldata) {
|
90 |
+
modelSize += externaldata.byteLength;
|
91 |
+
}
|
92 |
+
|
93 |
+
log(`model size ${Math.round(modelSize / 1024 / 1024)} MB`);
|
94 |
+
|
95 |
+
const opt = {
|
96 |
+
executionProviders: [provider],
|
97 |
+
preferredOutputLocation: {},
|
98 |
+
};
|
99 |
+
|
100 |
+
switch (provider) {
|
101 |
+
case "webgpu":
|
102 |
+
if (!("gpu" in navigator)) {
|
103 |
+
throw new Error("webgpu is NOT supported");
|
104 |
+
}
|
105 |
+
for (let i = 0; i < model_config.num_hidden_layers; ++i) {
|
106 |
+
opt.preferredOutputLocation[`present.${i}.key`] = 'gpu-buffer';
|
107 |
+
opt.preferredOutputLocation[`present.${i}.value`] = 'gpu-buffer';
|
108 |
+
}
|
109 |
+
break;
|
110 |
+
case "webnn":
|
111 |
+
if (!("ml" in navigator)) {
|
112 |
+
throw new Error("webnn is NOT supported");
|
113 |
+
}
|
114 |
+
break;
|
115 |
+
}
|
116 |
+
|
117 |
+
if (externaldata !== undefined) {
|
118 |
+
opt.externalData = [
|
119 |
+
{
|
120 |
+
data: externaldata,
|
121 |
+
path: 'decoder_model_merged.onnx.data'
|
122 |
+
},
|
123 |
+
]
|
124 |
+
}
|
125 |
+
if (verbose) {
|
126 |
+
opt.logSeverityLevel = 0;
|
127 |
+
opt.logVerbosityLevel = 0;
|
128 |
+
ort.env.logLevel = "verbose";
|
129 |
+
ort.env.debug = true;
|
130 |
+
}
|
131 |
+
|
132 |
+
ort.env.webgpu.profiling = {};
|
133 |
+
if (this.profiler) {
|
134 |
+
opt.enableProfiling = true;
|
135 |
+
ort.env.webgpu.profilingMode = 'default';
|
136 |
+
ort.env.webgpu.profiling.mode = 'default';
|
137 |
+
}
|
138 |
+
|
139 |
+
this.sess = await ort.InferenceSession.create(model_bytes, opt);
|
140 |
+
|
141 |
+
if (this.trace) {
|
142 |
+
ort.env.trace = true;
|
143 |
+
ort.env.webgpu.profiling.ondata = (version, inputsMetadata, outputsMetadata, kernelId, kernelType,
|
144 |
+
kernelName, programName, startTime, endTime) => { };
|
145 |
+
}
|
146 |
+
|
147 |
+
this.eos = model_config.eos_token_id;
|
148 |
+
this.kv_dims = [1, model_config.num_key_value_heads, 0, model_config.hidden_size / model_config.num_attention_heads];
|
149 |
+
this.dtype = config.model.dtype || "float16";
|
150 |
+
this.num_layers = model_config.num_hidden_layers;
|
151 |
+
this.initilize_feed();
|
152 |
+
}
|
153 |
+
|
154 |
+
initilize_feed() {
|
155 |
+
this.feed = {};
|
156 |
+
const empty = (this.dtype === "float16") ? new Uint16Array() : [];
|
157 |
+
for (let i = 0; i < this.num_layers; ++i) {
|
158 |
+
this.feed[`past_key_values.${i}.key`] = new ort.Tensor(this.dtype, empty, this.kv_dims)
|
159 |
+
this.feed[`past_key_values.${i}.value`] = new ort.Tensor(this.dtype, empty, this.kv_dims)
|
160 |
+
}
|
161 |
+
this.output_tokens = [];
|
162 |
+
}
|
163 |
+
|
164 |
+
|
165 |
+
argmax(t) {
|
166 |
+
const arr = t.data;
|
167 |
+
const start = t.dims[2] * (t.dims[1] - 1);
|
168 |
+
let max = arr[start];
|
169 |
+
let maxidx = 0;
|
170 |
+
|
171 |
+
for (let i = 0; i < t.dims[2]; i++) {
|
172 |
+
const val = arr[i + start];
|
173 |
+
if (!isFinite(val)) {
|
174 |
+
throw new Error("found infinitive in logits");
|
175 |
+
}
|
176 |
+
if (val > max) {
|
177 |
+
max = arr[i + start];
|
178 |
+
maxidx = i;
|
179 |
+
}
|
180 |
+
}
|
181 |
+
return maxidx;
|
182 |
+
}
|
183 |
+
|
184 |
+
update_kv_cache(feed, outputs) {
|
185 |
+
for (const name in outputs) {
|
186 |
+
if (name.startsWith('present')) {
|
187 |
+
let newName = name.replace('present', 'past_key_values');
|
188 |
+
// free old gpu buffer
|
189 |
+
const t = feed[newName];
|
190 |
+
if (t.location === 'gpu-buffer') {
|
191 |
+
t.dispose();
|
192 |
+
}
|
193 |
+
feed[newName] = outputs[name];
|
194 |
+
}
|
195 |
+
}
|
196 |
+
}
|
197 |
+
|
198 |
+
abort() {
|
199 |
+
this.stop = true;
|
200 |
+
}
|
201 |
+
|
202 |
+
async generate(tokens, callback, options) {
|
203 |
+
const keep_cache = options.keep_cache;
|
204 |
+
const max_tokens = options.max_tokens || 256;
|
205 |
+
const feed = this.feed;
|
206 |
+
const input_ids = new ort.Tensor('int64', BigInt64Array.from(tokens.map(BigInt)), [1, tokens.length]);
|
207 |
+
feed['input_ids'] = input_ids;
|
208 |
+
this.stop = false;
|
209 |
+
|
210 |
+
if (keep_cache) {
|
211 |
+
this.output_tokens.push(...input_ids)
|
212 |
+
} else {
|
213 |
+
this.initilize_feed();
|
214 |
+
this.output_tokens = Array.from(feed['input_ids'].data);
|
215 |
+
}
|
216 |
+
|
217 |
+
let last_token = 0n;
|
218 |
+
let seqlen = this.output_tokens.length;
|
219 |
+
if (this.need_position_ids) {
|
220 |
+
if (keep_cache) {
|
221 |
+
feed['position_ids'] = new ort.Tensor('int64', BigInt64Array.from({ length: seqlen }, (_, i) => BigInt(i)), [1, input_ids.length]);
|
222 |
+
} else {
|
223 |
+
feed['position_ids'] = new ort.Tensor('int64', BigInt64Array.from({ length: seqlen }, (_, i) => BigInt(i)), [1, seqlen]);
|
224 |
+
}
|
225 |
+
}
|
226 |
+
|
227 |
+
while (last_token != this.eos && seqlen < max_tokens && !this.stop) {
|
228 |
+
seqlen = this.output_tokens.length;
|
229 |
+
feed['attention_mask'] = new ort.Tensor('int64', BigInt64Array.from({ length: seqlen }, () => 1n), [1, seqlen]);
|
230 |
+
let outputs;
|
231 |
+
if (this.trace) {
|
232 |
+
console.timeStamp("RUN-BEGIN");
|
233 |
+
outputs = await this.sess.run(feed);
|
234 |
+
console.timeStamp("RUN-END");
|
235 |
+
} else {
|
236 |
+
outputs = await this.sess.run(feed);
|
237 |
+
}
|
238 |
+
last_token = BigInt(this.argmax(outputs.logits));
|
239 |
+
this.output_tokens.push(last_token);
|
240 |
+
if (callback && !this.profiler) {
|
241 |
+
callback(this.output_tokens);
|
242 |
+
}
|
243 |
+
this.update_kv_cache(feed, outputs);
|
244 |
+
feed['input_ids'] = new ort.Tensor('int64', BigInt64Array.from([last_token]), [1, 1]);
|
245 |
+
if (this.need_position_ids) {
|
246 |
+
feed['position_ids'] = new ort.Tensor('int64', BigInt64Array.from([BigInt(seqlen)]), [1, 1]);
|
247 |
+
}
|
248 |
+
}
|
249 |
+
if (this.profiler) {
|
250 |
+
this.sess.endProfiling();
|
251 |
+
}
|
252 |
+
return this.output_tokens;
|
253 |
+
}
|
254 |
+
}
|
255 |
+
|
256 |
+
const config = getConfig();
|
257 |
+
env.localModelPath = 'models';
|
258 |
+
env.allowRemoteModels = config.local == 0;
|
259 |
+
env.allowLocalModels = config.local == 1;
|
260 |
+
ort.env.wasm.numThreads = config.threads;
|
261 |
+
ort.env.wasm.simd = true;
|
262 |
+
|
263 |
+
const cons_log = [];
|
264 |
+
|
265 |
+
if (config.profiler === 2) {
|
266 |
+
console.log = function (message) {
|
267 |
+
if (!message.includes('_fence_')) {
|
268 |
+
cons_log.push(message);
|
269 |
+
}
|
270 |
+
};
|
271 |
+
}
|
272 |
+
|
273 |
+
const tokenizer = await AutoTokenizer.from_pretrained(config.model.path);
|
274 |
+
|
275 |
+
function create_download_link(cons_log) {
|
276 |
+
if (cons_log.length > 0) {
|
277 |
+
let link = document.getElementById('download').childNodes[0];
|
278 |
+
if (link === undefined) {
|
279 |
+
link = document.createElement("a", "download-link");
|
280 |
+
link.download = "profiler.log";
|
281 |
+
link.innerText = "Download";
|
282 |
+
document.getElementById('download').appendChild(link);
|
283 |
+
}
|
284 |
+
const base64 = btoa(cons_log.join('\n'));
|
285 |
+
link.href = `data:application/json;base64,${base64}`;
|
286 |
+
}
|
287 |
+
}
|
288 |
+
|
289 |
+
async function fetchAndCache(url) {
|
290 |
+
try {
|
291 |
+
const cache = await caches.open("onnx");
|
292 |
+
let cachedResponse = await cache.match(url);
|
293 |
+
if (cachedResponse == undefined) {
|
294 |
+
await cache.add(url);
|
295 |
+
cachedResponse = await cache.match(url);
|
296 |
+
log(`${url} (network)`);
|
297 |
+
} else {
|
298 |
+
log(`${url} (cached)`);
|
299 |
+
}
|
300 |
+
const data = await cachedResponse.arrayBuffer();
|
301 |
+
return data;
|
302 |
+
} catch (error) {
|
303 |
+
log(`${url} (network)`);
|
304 |
+
return await fetch(url).then(response => response.arrayBuffer());
|
305 |
+
}
|
306 |
+
}
|
307 |
+
|
308 |
+
function token_to_text(tokenizer, tokens, startidx) {
|
309 |
+
const txt = tokenizer.decode(tokens.slice(startidx), { skip_special_tokens: true, });
|
310 |
+
return txt;
|
311 |
+
}
|
312 |
+
|
313 |
+
const llm = new LLM();
|
314 |
+
|
315 |
+
async function main() {
|
316 |
+
|
317 |
+
const model = config.model;
|
318 |
+
|
319 |
+
await llm.load(model, {
|
320 |
+
provider: config.provider,
|
321 |
+
verbose: config.verbose,
|
322 |
+
profiler: config.profiler,
|
323 |
+
trace: config.trace,
|
324 |
+
local: config.local,
|
325 |
+
});
|
326 |
+
|
327 |
+
|
328 |
+
document.getElementById('status').innerText = "";
|
329 |
+
const query = "Tell me about Constantinople.";
|
330 |
+
let prompt;
|
331 |
+
|
332 |
+
if (model.name.includes('phi3')) {
|
333 |
+
prompt = `User:${query}\nAssistant:`;
|
334 |
+
} else {
|
335 |
+
prompt = `"<|system|>\nYou are a friendly assistant.</s>\n<|user|>\n${query}</s>\n<|assistant|>\n`;
|
336 |
+
}
|
337 |
+
const { input_ids } = await tokenizer(prompt, { return_tensor: false, padding: true, truncation: true });
|
338 |
+
|
339 |
+
const start_timer = performance.now();
|
340 |
+
const output_tokens = await llm.generate(input_ids, (output_tokens) => {
|
341 |
+
document.getElementById('result').innerText = token_to_text(tokenizer, output_tokens, input_ids.length);
|
342 |
+
}, {});
|
343 |
+
const took = (performance.now() - start_timer) / 1000;
|
344 |
+
const txt = token_to_text(tokenizer, output_tokens, input_ids.length);
|
345 |
+
const seqlen = output_tokens.length;
|
346 |
+
document.getElementById('result').innerText = txt;
|
347 |
+
const perf = `${seqlen} tokens in ${took.toFixed(1)}sec, ${(seqlen / took).toFixed(2)} tokens/sec`;
|
348 |
+
console.log(perf + " @@1");
|
349 |
+
document.getElementById('perf').innerText = perf;
|
350 |
+
if (config.csv) {
|
351 |
+
log(`${model.name},${took.toFixed(2)},${(seqlen / took).toFixed(3)},${seqlen},@@2`);
|
352 |
+
}
|
353 |
+
}
|
354 |
+
try {
|
355 |
+
await main();
|
356 |
+
} catch (error) {
|
357 |
+
console.error(error);
|
358 |
+
document.getElementById('result').innerText = error.message;
|
359 |
+
} finally {
|
360 |
+
create_download_link(cons_log);
|
361 |
+
}
|
362 |
+
</script>
|
363 |
+
|
364 |
+
<div id="status"></div>
|
365 |
+
<br />
|
366 |
+
<div id="result"></div>
|
367 |
+
<br />
|
368 |
+
<div id="perf"></div>
|
369 |
+
<br />
|
370 |
+
<div id="download"></div>
|
371 |
+
<br />
|
372 |
+
|
373 |
+
</body>
|
374 |
+
|
375 |
+
</html>
|
demo/ort-phi3/models/phi3-int4/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
demo/ort-phi3/models/phi3-int4/tokenizer_config.json
ADDED
@@ -0,0 +1,349 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_bos_token": true,
|
3 |
+
"add_eos_token": false,
|
4 |
+
"added_tokens_decoder": {
|
5 |
+
"0": {
|
6 |
+
"content": "<unk>",
|
7 |
+
"lstrip": false,
|
8 |
+
"normalized": false,
|
9 |
+
"rstrip": false,
|
10 |
+
"single_word": false,
|
11 |
+
"special": true
|
12 |
+
},
|
13 |
+
"1": {
|
14 |
+
"content": "<s>",
|
15 |
+
"lstrip": false,
|
16 |
+
"normalized": false,
|
17 |
+
"rstrip": false,
|
18 |
+
"single_word": false,
|
19 |
+
"special": true
|
20 |
+
},
|
21 |
+
"2": {
|
22 |
+
"content": "</s>",
|
23 |
+
"lstrip": false,
|
24 |
+
"normalized": false,
|
25 |
+
"rstrip": true,
|
26 |
+
"single_word": false,
|
27 |
+
"special": false
|
28 |
+
},
|
29 |
+
"32000": {
|
30 |
+
"content": "<|endoftext|>",
|
31 |
+
"lstrip": false,
|
32 |
+
"normalized": false,
|
33 |
+
"rstrip": false,
|
34 |
+
"single_word": false,
|
35 |
+
"special": true
|
36 |
+
},
|
37 |
+
"32001": {
|
38 |
+
"content": "<|assistant|>",
|
39 |
+
"lstrip": false,
|
40 |
+
"normalized": false,
|
41 |
+
"rstrip": true,
|
42 |
+
"single_word": false,
|
43 |
+
"special": true
|
44 |
+
},
|
45 |
+
"32002": {
|
46 |
+
"content": "<|step|>",
|
47 |
+
"lstrip": false,
|
48 |
+
"normalized": false,
|
49 |
+
"rstrip": true,
|
50 |
+
"single_word": false,
|
51 |
+
"special": true
|
52 |
+
},
|
53 |
+
"32003": {
|
54 |
+
"content": "<|function_output|>",
|
55 |
+
"lstrip": false,
|
56 |
+
"normalized": false,
|
57 |
+
"rstrip": true,
|
58 |
+
"single_word": false,
|
59 |
+
"special": true
|
60 |
+
},
|
61 |
+
"32004": {
|
62 |
+
"content": "<|tag|>",
|
63 |
+
"lstrip": false,
|
64 |
+
"normalized": false,
|
65 |
+
"rstrip": true,
|
66 |
+
"single_word": false,
|
67 |
+
"special": true
|
68 |
+
},
|
69 |
+
"32005": {
|
70 |
+
"content": "<|function_call|>",
|
71 |
+
"lstrip": false,
|
72 |
+
"normalized": false,
|
73 |
+
"rstrip": true,
|
74 |
+
"single_word": false,
|
75 |
+
"special": true
|
76 |
+
},
|
77 |
+
"32006": {
|
78 |
+
"content": "<|system|>",
|
79 |
+
"lstrip": false,
|
80 |
+
"normalized": false,
|
81 |
+
"rstrip": true,
|
82 |
+
"single_word": false,
|
83 |
+
"special": true
|
84 |
+
},
|
85 |
+
"32007": {
|
86 |
+
"content": "<|end|>",
|
87 |
+
"lstrip": false,
|
88 |
+
"normalized": false,
|
89 |
+
"rstrip": true,
|
90 |
+
"single_word": false,
|
91 |
+
"special": true
|
92 |
+
},
|
93 |
+
"32008": {
|
94 |
+
"content": "<|raw|>",
|
95 |
+
"lstrip": false,
|
96 |
+
"normalized": false,
|
97 |
+
"rstrip": true,
|
98 |
+
"single_word": false,
|
99 |
+
"special": true
|
100 |
+
},
|
101 |
+
"32009": {
|
102 |
+
"content": "<|continue|>",
|
103 |
+
"lstrip": false,
|
104 |
+
"normalized": false,
|
105 |
+
"rstrip": true,
|
106 |
+
"single_word": false,
|
107 |
+
"special": true
|
108 |
+
},
|
109 |
+
"32010": {
|
110 |
+
"content": "<|user|>",
|
111 |
+
"lstrip": false,
|
112 |
+
"normalized": false,
|
113 |
+
"rstrip": true,
|
114 |
+
"single_word": false,
|
115 |
+
"special": true
|
116 |
+
},
|
117 |
+
"32011": {
|
118 |
+
"content": "<|function_list|>",
|
119 |
+
"lstrip": false,
|
120 |
+
"normalized": false,
|
121 |
+
"rstrip": true,
|
122 |
+
"single_word": false,
|
123 |
+
"special": true
|
124 |
+
},
|
125 |
+
"32012": {
|
126 |
+
"content": "<|calc|>",
|
127 |
+
"lstrip": false,
|
128 |
+
"normalized": false,
|
129 |
+
"rstrip": true,
|
130 |
+
"single_word": false,
|
131 |
+
"special": true
|
132 |
+
},
|
133 |
+
"32013": {
|
134 |
+
"content": "<|code|>",
|
135 |
+
"lstrip": false,
|
136 |
+
"normalized": false,
|
137 |
+
"rstrip": true,
|
138 |
+
"single_word": false,
|
139 |
+
"special": true
|
140 |
+
},
|
141 |
+
"32014": {
|
142 |
+
"content": "<|/code|>",
|
143 |
+
"lstrip": false,
|
144 |
+
"normalized": false,
|
145 |
+
"rstrip": true,
|
146 |
+
"single_word": false,
|
147 |
+
"special": true
|
148 |
+
},
|
149 |
+
"32015": {
|
150 |
+
"content": "<|summary|>",
|
151 |
+
"lstrip": false,
|
152 |
+
"normalized": false,
|
153 |
+
"rstrip": true,
|
154 |
+
"single_word": false,
|
155 |
+
"special": true
|
156 |
+
},
|
157 |
+
"32016": {
|
158 |
+
"content": "<|resource|>",
|
159 |
+
"lstrip": false,
|
160 |
+
"normalized": false,
|
161 |
+
"rstrip": true,
|
162 |
+
"single_word": false,
|
163 |
+
"special": true
|
164 |
+
},
|
165 |
+
"32017": {
|
166 |
+
"content": "<|assistant_mask|>",
|
167 |
+
"lstrip": false,
|
168 |
+
"normalized": false,
|
169 |
+
"rstrip": true,
|
170 |
+
"single_word": false,
|
171 |
+
"special": true
|
172 |
+
},
|
173 |
+
"32018": {
|
174 |
+
"content": "<|start|>",
|
175 |
+
"lstrip": false,
|
176 |
+
"normalized": false,
|
177 |
+
"rstrip": true,
|
178 |
+
"single_word": false,
|
179 |
+
"special": true
|
180 |
+
},
|
181 |
+
"32019": {
|
182 |
+
"content": "<|message|>",
|
183 |
+
"lstrip": false,
|
184 |
+
"normalized": false,
|
185 |
+
"rstrip": true,
|
186 |
+
"single_word": false,
|
187 |
+
"special": true
|
188 |
+
},
|
189 |
+
"32020": {
|
190 |
+
"content": "<|fim_prefix|>",
|
191 |
+
"lstrip": false,
|
192 |
+
"normalized": false,
|
193 |
+
"rstrip": true,
|
194 |
+
"single_word": false,
|
195 |
+
"special": true
|
196 |
+
},
|
197 |
+
"32021": {
|
198 |
+
"content": "<|fim_middle|>",
|
199 |
+
"lstrip": false,
|
200 |
+
"normalized": false,
|
201 |
+
"rstrip": true,
|
202 |
+
"single_word": false,
|
203 |
+
"special": true
|
204 |
+
},
|
205 |
+
"32022": {
|
206 |
+
"content": "<|fim_suffix|>",
|
207 |
+
"lstrip": false,
|
208 |
+
"normalized": false,
|
209 |
+
"rstrip": true,
|
210 |
+
"single_word": false,
|
211 |
+
"special": true
|
212 |
+
},
|
213 |
+
"32023": {
|
214 |
+
"content": "<|meta_start|>",
|
215 |
+
"lstrip": false,
|
216 |
+
"normalized": false,
|
217 |
+
"rstrip": true,
|
218 |
+
"single_word": false,
|
219 |
+
"special": true
|
220 |
+
},
|
221 |
+
"32024": {
|
222 |
+
"content": "<|ipynb_marker|>",
|
223 |
+
"lstrip": false,
|
224 |
+
"normalized": false,
|
225 |
+
"rstrip": true,
|
226 |
+
"single_word": false,
|
227 |
+
"special": true
|
228 |
+
},
|
229 |
+
"32025": {
|
230 |
+
"content": "<|diff_marker|>",
|
231 |
+
"lstrip": false,
|
232 |
+
"normalized": false,
|
233 |
+
"rstrip": true,
|
234 |
+
"single_word": false,
|
235 |
+
"special": true
|
236 |
+
},
|
237 |
+
"32026": {
|
238 |
+
"content": "<|ghissue|>",
|
239 |
+
"lstrip": false,
|
240 |
+
"normalized": false,
|
241 |
+
"rstrip": true,
|
242 |
+
"single_word": false,
|
243 |
+
"special": true
|
244 |
+
},
|
245 |
+
"32027": {
|
246 |
+
"content": "<|ghreview|>",
|
247 |
+
"lstrip": false,
|
248 |
+
"normalized": false,
|
249 |
+
"rstrip": true,
|
250 |
+
"single_word": false,
|
251 |
+
"special": true
|
252 |
+
},
|
253 |
+
"32028": {
|
254 |
+
"content": "<|disc_start|>",
|
255 |
+
"lstrip": false,
|
256 |
+
"normalized": false,
|
257 |
+
"rstrip": true,
|
258 |
+
"single_word": false,
|
259 |
+
"special": true
|
260 |
+
},
|
261 |
+
"32029": {
|
262 |
+
"content": "<|disc_sep|>",
|
263 |
+
"lstrip": false,
|
264 |
+
"normalized": false,
|
265 |
+
"rstrip": true,
|
266 |
+
"single_word": false,
|
267 |
+
"special": true
|
268 |
+
},
|
269 |
+
"32030": {
|
270 |
+
"content": "<|disc_thread|><|query|>",
|
271 |
+
"lstrip": false,
|
272 |
+
"normalized": false,
|
273 |
+
"rstrip": true,
|
274 |
+
"single_word": false,
|
275 |
+
"special": true
|
276 |
+
},
|
277 |
+
"32031": {
|
278 |
+
"content": "<|/query|>",
|
279 |
+
"lstrip": false,
|
280 |
+
"normalized": false,
|
281 |
+
"rstrip": true,
|
282 |
+
"single_word": false,
|
283 |
+
"special": true
|
284 |
+
},
|
285 |
+
"32032": {
|
286 |
+
"content": "<|data|>",
|
287 |
+
"lstrip": false,
|
288 |
+
"normalized": false,
|
289 |
+
"rstrip": true,
|
290 |
+
"single_word": false,
|
291 |
+
"special": true
|
292 |
+
},
|
293 |
+
"32033": {
|
294 |
+
"content": "<|/data|>",
|
295 |
+
"lstrip": false,
|
296 |
+
"normalized": false,
|
297 |
+
"rstrip": true,
|
298 |
+
"single_word": false,
|
299 |
+
"special": true
|
300 |
+
},
|
301 |
+
"32034": {
|
302 |
+
"content": "<|sys|>",
|
303 |
+
"lstrip": false,
|
304 |
+
"normalized": false,
|
305 |
+
"rstrip": true,
|
306 |
+
"single_word": false,
|
307 |
+
"special": true
|
308 |
+
},
|
309 |
+
"32035": {
|
310 |
+
"content": "<|/sys|>",
|
311 |
+
"lstrip": false,
|
312 |
+
"normalized": false,
|
313 |
+
"rstrip": true,
|
314 |
+
"single_word": false,
|
315 |
+
"special": true
|
316 |
+
},
|
317 |
+
"32036": {
|
318 |
+
"content": "<|inst|>",
|
319 |
+
"lstrip": false,
|
320 |
+
"normalized": false,
|
321 |
+
"rstrip": true,
|
322 |
+
"single_word": false,
|
323 |
+
"special": true
|
324 |
+
},
|
325 |
+
"32037": {
|
326 |
+
"content": "<|/inst|>",
|
327 |
+
"lstrip": false,
|
328 |
+
"normalized": false,
|
329 |
+
"rstrip": true,
|
330 |
+
"single_word": false,
|
331 |
+
"special": true
|
332 |
+
}
|
333 |
+
},
|
334 |
+
"additional_special_tokens": [
|
335 |
+
"<|/inst|>"
|
336 |
+
],
|
337 |
+
"bos_token": "<s>",
|
338 |
+
"chat_template": "{{ bos_token }}{% for message in messages %}{{'<|' + message['role'] + '|>' + '\n' + message['content'] + '<|end|>\n' }}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %}",
|
339 |
+
"clean_up_tokenization_spaces": false,
|
340 |
+
"eos_token": "<|endoftext|>",
|
341 |
+
"legacy": false,
|
342 |
+
"model_max_length": 4096,
|
343 |
+
"pad_token": "<|endoftext|>",
|
344 |
+
"padding_side": "left",
|
345 |
+
"sp_model_kwargs": {},
|
346 |
+
"tokenizer_class": "LlamaTokenizer",
|
347 |
+
"unk_token": "<unk>",
|
348 |
+
"use_default_system_prompt": false
|
349 |
+
}
|
main.js
CHANGED
@@ -49,11 +49,9 @@ function createElem(tag, attrs = {}, children = []) {
|
|
49 |
return elem;
|
50 |
}
|
51 |
|
52 |
-
|
53 |
-
|
54 |
-
const pageCategories = [
|
55 |
{
|
56 |
-
|
57 |
description: `ONNX Runtime`,
|
58 |
demos: {
|
59 |
'ort-phi2': {
|
@@ -61,6 +59,11 @@ const pageCategories = [
|
|
61 |
description: `phi2 from Microsoft`,
|
62 |
filename: "ort-phi2",
|
63 |
},
|
|
|
|
|
|
|
|
|
|
|
64 |
'ort-sam': {
|
65 |
name: 'Segment Anything',
|
66 |
description: `Segment Anything from https://github.com/guschmue/ort-webgpu/tree/master/segment-anything`,
|
@@ -84,7 +87,7 @@ const pageCategories = [
|
|
84 |
},
|
85 |
},
|
86 |
{
|
87 |
-
|
88 |
description: `TFLite`,
|
89 |
demos: {
|
90 |
'tflite-gemma': {
|
@@ -95,7 +98,7 @@ const pageCategories = [
|
|
95 |
},
|
96 |
},
|
97 |
{
|
98 |
-
|
99 |
description: 'Transformers.js',
|
100 |
demos: {
|
101 |
benchmark: {
|
@@ -131,7 +134,7 @@ const pageCategories = [
|
|
131 |
},
|
132 |
},
|
133 |
{
|
134 |
-
|
135 |
description: 'TVM',
|
136 |
demos: {
|
137 |
sd: {
|
@@ -32475,7 +32478,7 @@ function setSampleIFrameURL(e, demoInfo) {
|
|
32475 |
// from those keys to each demo.
|
32476 |
const samplesByKey = new Map();
|
32477 |
// Generate the list of demos
|
32478 |
-
for (const {
|
32479 |
for (const [key, demoInfo] of Object.entries(demos)) {
|
32480 |
samplesByKey.set(key, demoInfo);
|
32481 |
}
|
@@ -32484,7 +32487,7 @@ for (const { title, description, demos } of pageCategories) {
|
|
32484 |
createElem('div', { className: 'sampleCategory' }, [
|
32485 |
createElem('h3', {
|
32486 |
style: { 'margin-top': '5px' },
|
32487 |
-
textContent:
|
32488 |
dataset: { tooltip: description },
|
32489 |
}),
|
32490 |
]),
|
|
|
49 |
return elem;
|
50 |
}
|
51 |
|
52 |
+
const demoCategories = [
|
|
|
|
|
53 |
{
|
54 |
+
name: `ONNX Runtime`,
|
55 |
description: `ONNX Runtime`,
|
56 |
demos: {
|
57 |
'ort-phi2': {
|
|
|
59 |
description: `phi2 from Microsoft`,
|
60 |
filename: "ort-phi2",
|
61 |
},
|
62 |
+
'ort-phi3': {
|
63 |
+
name: 'phi3',
|
64 |
+
description: `phi3 from Microsoft`,
|
65 |
+
filename: "ort-phi3",
|
66 |
+
},
|
67 |
'ort-sam': {
|
68 |
name: 'Segment Anything',
|
69 |
description: `Segment Anything from https://github.com/guschmue/ort-webgpu/tree/master/segment-anything`,
|
|
|
87 |
},
|
88 |
},
|
89 |
{
|
90 |
+
name: `TFLite`,
|
91 |
description: `TFLite`,
|
92 |
demos: {
|
93 |
'tflite-gemma': {
|
|
|
98 |
},
|
99 |
},
|
100 |
{
|
101 |
+
name: 'Transformers.js',
|
102 |
description: 'Transformers.js',
|
103 |
demos: {
|
104 |
benchmark: {
|
|
|
134 |
},
|
135 |
},
|
136 |
{
|
137 |
+
name: 'TVM',
|
138 |
description: 'TVM',
|
139 |
demos: {
|
140 |
sd: {
|
|
|
32478 |
// from those keys to each demo.
|
32479 |
const samplesByKey = new Map();
|
32480 |
// Generate the list of demos
|
32481 |
+
for (const { name, description, demos } of demoCategories) {
|
32482 |
for (const [key, demoInfo] of Object.entries(demos)) {
|
32483 |
samplesByKey.set(key, demoInfo);
|
32484 |
}
|
|
|
32487 |
createElem('div', { className: 'sampleCategory' }, [
|
32488 |
createElem('h3', {
|
32489 |
style: { 'margin-top': '5px' },
|
32490 |
+
textContent: name,
|
32491 |
dataset: { tooltip: description },
|
32492 |
}),
|
32493 |
]),
|