Spaces:
Running
Running
Yang Gu
commited on
Commit
·
afed82d
1
Parent(s):
b02752e
Add llm based on MediaPipe and TFLite
Browse files- README.md +0 -1
- index.html +36 -19
- llm-inference/gemma-2b-it-gpu-int4.bin +3 -0
- llm-inference/index.html +32 -0
- llm-inference/index.js +130 -0
- style.css +0 -28
README.md
CHANGED
@@ -7,4 +7,3 @@ sdk: static
|
|
7 |
pinned: false
|
8 |
---
|
9 |
|
10 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
7 |
pinned: false
|
8 |
---
|
9 |
|
|
index.html
CHANGED
@@ -1,19 +1,36 @@
|
|
1 |
-
|
2 |
-
<
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<title>WebGPU AI Demos</title>
|
2 |
+
<body>
|
3 |
+
<h1 align="center">WebGPU AI Demos</h1>
|
4 |
+
<script src="main.js"></script>
|
5 |
+
<script>
|
6 |
+
"use strict";
|
7 |
+
const demos = [["LLMs with MediaPipe and TFLite", "llm-inference", "<a href=https://github.com/googlesamples/mediapipe/tree/main/examples/llm_inference/js>original code</a>, <a href=https://developers.googleblog.com/2024/03/running-large-language-models-on-device-with-mediapipe-andtensorflow-lite.html>more info</a>"]];
|
8 |
+
|
9 |
+
// table
|
10 |
+
const table = document.createElement("table");
|
11 |
+
table.align = "center";
|
12 |
+
table.style.width = "80%";
|
13 |
+
table.setAttribute("border", "1");
|
14 |
+
document.body.appendChild(table);
|
15 |
+
|
16 |
+
// first line
|
17 |
+
let row = table.insertRow(-1);
|
18 |
+
const headers = ["Name", "Description"];
|
19 |
+
row.style.fontWeight = "bold";
|
20 |
+
for (let header of headers) {
|
21 |
+
let td = row.insertCell(-1);
|
22 |
+
td.innerHTML = header;
|
23 |
+
}
|
24 |
+
|
25 |
+
// rest of lines
|
26 |
+
for (let demo of demos) {
|
27 |
+
row = table.insertRow(-1);
|
28 |
+
let td = row.insertCell(-1);
|
29 |
+
td.innerHTML = `<a href=${demo[1]}>${demo[0]}</a>`;
|
30 |
+
td = row.insertCell(-1);
|
31 |
+
td.innerHTML = demo[2];
|
32 |
+
}
|
33 |
+
|
34 |
+
|
35 |
+
</script>
|
36 |
+
</body>
|
llm-inference/gemma-2b-it-gpu-int4.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ef44d548e44a2a6f313c3f3e94a48e1de786871ad95f4cd81bfb35372032cdbd
|
3 |
+
size 1354301440
|
llm-inference/index.html
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!-- Copyright 2024 The MediaPipe Authors.
|
2 |
+
|
3 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
you may not use this file except in compliance with the License.
|
5 |
+
You may obtain a copy of the License at
|
6 |
+
|
7 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
|
9 |
+
Unless required by applicable law or agreed to in writing, software
|
10 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
See the License for the specific language governing permissions and
|
13 |
+
limitations under the License. -->
|
14 |
+
|
15 |
+
<!DOCTYPE html>
|
16 |
+
<html lang="en">
|
17 |
+
<head>
|
18 |
+
<title>LLM Inference Web Demo</title>
|
19 |
+
</head>
|
20 |
+
<body>
|
21 |
+
<div style="text-align: center">
|
22 |
+
Input<br />
|
23 |
+
<textarea id="input" style="height: 100px; width: 80%;"></textarea><br />
|
24 |
+
<input type="button" id="submit" value="Get Response" disabled /><br />
|
25 |
+
<text id="status"></text><br />
|
26 |
+
<br />
|
27 |
+
Result<br />
|
28 |
+
<textarea id="output" style="height: 800px; width: 80%;"></textarea>
|
29 |
+
<script type="module" src="index.js"></script>
|
30 |
+
</div>
|
31 |
+
</body>
|
32 |
+
</html>
|
llm-inference/index.js
ADDED
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
// Copyright 2024 The MediaPipe Authors.
|
2 |
+
|
3 |
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
// you may not use this file except in compliance with the License.
|
5 |
+
// You may obtain a copy of the License at
|
6 |
+
|
7 |
+
// http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
|
9 |
+
// Unless required by applicable law or agreed to in writing, software
|
10 |
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
// See the License for the specific language governing permissions and
|
13 |
+
// limitations under the License.
|
14 |
+
|
15 |
+
// ---------------------------------------------------------------------------------------- //
|
16 |
+
|
17 |
+
import {FilesetResolver, LlmInference} from 'https://cdn.jsdelivr.net/npm/@mediapipe/tasks-genai';
|
18 |
+
|
19 |
+
const input = document.getElementById('input');
|
20 |
+
const output = document.getElementById('output');
|
21 |
+
const submit = document.getElementById('submit');
|
22 |
+
const status = document.getElementById('status');
|
23 |
+
|
24 |
+
const modelFileName = 'gemma-2b-it-gpu-int4.bin'; /* Update the file name */
|
25 |
+
let startTime;
|
26 |
+
|
27 |
+
/**
|
28 |
+
* Display newly generated partial results to the output text box.
|
29 |
+
*/
|
30 |
+
function displayPartialResults(partialResults, complete) {
|
31 |
+
output.textContent += partialResults;
|
32 |
+
|
33 |
+
if (complete) {
|
34 |
+
if (!output.textContent) {
|
35 |
+
output.textContent = 'Result is empty';
|
36 |
+
}
|
37 |
+
submit.disabled = false;
|
38 |
+
|
39 |
+
const wordCount = output.textContent.split(' ').length;
|
40 |
+
const seconds = Math.round((performance.now() - startTime) / 1000, 2);
|
41 |
+
const wordCountPerSecond = Math.round(wordCount / seconds, 2);
|
42 |
+
status.innerHTML = `${wordCount} words in ${seconds} seconds, ${wordCountPerSecond} words per second`;
|
43 |
+
}
|
44 |
+
}
|
45 |
+
|
46 |
+
// Get model via Origin Private File System
|
47 |
+
async function getModelOPFS(name, url, updateModel) {
|
48 |
+
const root = await navigator.storage.getDirectory();
|
49 |
+
let fileHandle;
|
50 |
+
|
51 |
+
async function updateFile() {
|
52 |
+
const response = await fetch(url);
|
53 |
+
const buffer = await readResponse(response);
|
54 |
+
fileHandle = await root.getFileHandle(name, {create: true});
|
55 |
+
const writable = await fileHandle.createWritable();
|
56 |
+
await writable.write(buffer);
|
57 |
+
await writable.close();
|
58 |
+
return buffer;
|
59 |
+
}
|
60 |
+
|
61 |
+
if (updateModel) {
|
62 |
+
return await updateFile();
|
63 |
+
}
|
64 |
+
|
65 |
+
try {
|
66 |
+
fileHandle = await root.getFileHandle(name);
|
67 |
+
const blob = await fileHandle.getFile();
|
68 |
+
return await blob.arrayBuffer();
|
69 |
+
} catch (e) {
|
70 |
+
return await updateFile();
|
71 |
+
}
|
72 |
+
}
|
73 |
+
|
74 |
+
async function readResponse(response) {
|
75 |
+
const contentLength = response.headers.get('Content-Length');
|
76 |
+
let total = parseInt(contentLength ?? '0');
|
77 |
+
let buffer = new Uint8Array(total);
|
78 |
+
let loaded = 0;
|
79 |
+
|
80 |
+
const reader = response.body.getReader();
|
81 |
+
async function read() {
|
82 |
+
const {done, value} = await reader.read();
|
83 |
+
if (done) return;
|
84 |
+
|
85 |
+
let newLoaded = loaded + value.length;
|
86 |
+
if (newLoaded > total) {
|
87 |
+
total = newLoaded;
|
88 |
+
let newBuffer = new Uint8Array(total);
|
89 |
+
newBuffer.set(buffer);
|
90 |
+
buffer = newBuffer;
|
91 |
+
}
|
92 |
+
buffer.set(value, loaded);
|
93 |
+
loaded = newLoaded;
|
94 |
+
return read();
|
95 |
+
}
|
96 |
+
|
97 |
+
await read();
|
98 |
+
return buffer;
|
99 |
+
}
|
100 |
+
|
101 |
+
/**
|
102 |
+
* Main function to run LLM Inference.
|
103 |
+
*/
|
104 |
+
async function runDemo() {
|
105 |
+
const genaiFileset = await FilesetResolver.forGenAiTasks(
|
106 |
+
'https://cdn.jsdelivr.net/npm/@mediapipe/tasks-genai/wasm');
|
107 |
+
let llmInference;
|
108 |
+
const modelBuffer = new Int8Array(await getModelOPFS(modelFileName, modelFileName, false));
|
109 |
+
|
110 |
+
submit.onclick = () => {
|
111 |
+
startTime = performance.now();
|
112 |
+
output.textContent = '';
|
113 |
+
status.innerHTML = '';
|
114 |
+
submit.disabled = true;
|
115 |
+
llmInference.generateResponse(input.value, displayPartialResults);
|
116 |
+
};
|
117 |
+
|
118 |
+
submit.value = 'Loading the model...'
|
119 |
+
LlmInference
|
120 |
+
.createFromModelBuffer(genaiFileset, modelBuffer)
|
121 |
+
.then(llm => {
|
122 |
+
llmInference = llm;
|
123 |
+
submit.disabled = false;
|
124 |
+
submit.value = 'Get Response'
|
125 |
+
}).catch(() =>{
|
126 |
+
alert('Failed to initialize the task.');
|
127 |
+
});
|
128 |
+
}
|
129 |
+
|
130 |
+
runDemo();
|
style.css
DELETED
@@ -1,28 +0,0 @@
|
|
1 |
-
body {
|
2 |
-
padding: 2rem;
|
3 |
-
font-family: -apple-system, BlinkMacSystemFont, "Arial", sans-serif;
|
4 |
-
}
|
5 |
-
|
6 |
-
h1 {
|
7 |
-
font-size: 16px;
|
8 |
-
margin-top: 0;
|
9 |
-
}
|
10 |
-
|
11 |
-
p {
|
12 |
-
color: rgb(107, 114, 128);
|
13 |
-
font-size: 15px;
|
14 |
-
margin-bottom: 10px;
|
15 |
-
margin-top: 5px;
|
16 |
-
}
|
17 |
-
|
18 |
-
.card {
|
19 |
-
max-width: 620px;
|
20 |
-
margin: 0 auto;
|
21 |
-
padding: 16px;
|
22 |
-
border: 1px solid lightgray;
|
23 |
-
border-radius: 16px;
|
24 |
-
}
|
25 |
-
|
26 |
-
.card p:last-child {
|
27 |
-
margin-bottom: 0;
|
28 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|