Yang Gu commited on
Commit
afed82d
·
1 Parent(s): b02752e

Add llm based on MediaPipe and TFLite

Browse files
README.md CHANGED
@@ -7,4 +7,3 @@ sdk: static
7
  pinned: false
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
7
  pinned: false
8
  ---
9
 
 
index.html CHANGED
@@ -1,19 +1,36 @@
1
- <!doctype html>
2
- <html>
3
- <head>
4
- <meta charset="utf-8" />
5
- <meta name="viewport" content="width=device-width" />
6
- <title>My static Space</title>
7
- <link rel="stylesheet" href="style.css" />
8
- </head>
9
- <body>
10
- <div class="card">
11
- <h1>Welcome to your static Space!</h1>
12
- <p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
13
- <p>
14
- Also don't forget to check the
15
- <a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
16
- </p>
17
- </div>
18
- </body>
19
- </html>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <title>WebGPU AI Demos</title>
2
+ <body>
3
+ <h1 align="center">WebGPU AI Demos</h1>
4
+ <script src="main.js"></script>
5
+ <script>
6
+ "use strict";
7
+ const demos = [["LLMs with MediaPipe and TFLite", "llm-inference", "<a href=https://github.com/googlesamples/mediapipe/tree/main/examples/llm_inference/js>original code</a>, <a href=https://developers.googleblog.com/2024/03/running-large-language-models-on-device-with-mediapipe-andtensorflow-lite.html>more info</a>"]];
8
+
9
+ // table
10
+ const table = document.createElement("table");
11
+ table.align = "center";
12
+ table.style.width = "80%";
13
+ table.setAttribute("border", "1");
14
+ document.body.appendChild(table);
15
+
16
+ // first line
17
+ let row = table.insertRow(-1);
18
+ const headers = ["Name", "Description"];
19
+ row.style.fontWeight = "bold";
20
+ for (let header of headers) {
21
+ let td = row.insertCell(-1);
22
+ td.innerHTML = header;
23
+ }
24
+
25
+ // rest of lines
26
+ for (let demo of demos) {
27
+ row = table.insertRow(-1);
28
+ let td = row.insertCell(-1);
29
+ td.innerHTML = `<a href=${demo[1]}>${demo[0]}</a>`;
30
+ td = row.insertCell(-1);
31
+ td.innerHTML = demo[2];
32
+ }
33
+
34
+
35
+ </script>
36
+ </body>
llm-inference/gemma-2b-it-gpu-int4.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef44d548e44a2a6f313c3f3e94a48e1de786871ad95f4cd81bfb35372032cdbd
3
+ size 1354301440
llm-inference/index.html ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- Copyright 2024 The MediaPipe Authors.
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License. -->
14
+
15
+ <!DOCTYPE html>
16
+ <html lang="en">
17
+ <head>
18
+ <title>LLM Inference Web Demo</title>
19
+ </head>
20
+ <body>
21
+ <div style="text-align: center">
22
+ Input<br />
23
+ <textarea id="input" style="height: 100px; width: 80%;"></textarea><br />
24
+ <input type="button" id="submit" value="Get Response" disabled /><br />
25
+ <text id="status"></text><br />
26
+ <br />
27
+ Result<br />
28
+ <textarea id="output" style="height: 800px; width: 80%;"></textarea>
29
+ <script type="module" src="index.js"></script>
30
+ </div>
31
+ </body>
32
+ </html>
llm-inference/index.js ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Copyright 2024 The MediaPipe Authors.
2
+
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+
15
+ // ---------------------------------------------------------------------------------------- //
16
+
17
+ import {FilesetResolver, LlmInference} from 'https://cdn.jsdelivr.net/npm/@mediapipe/tasks-genai';
18
+
19
+ const input = document.getElementById('input');
20
+ const output = document.getElementById('output');
21
+ const submit = document.getElementById('submit');
22
+ const status = document.getElementById('status');
23
+
24
+ const modelFileName = 'gemma-2b-it-gpu-int4.bin'; /* Update the file name */
25
+ let startTime;
26
+
27
+ /**
28
+ * Display newly generated partial results to the output text box.
29
+ */
30
+ function displayPartialResults(partialResults, complete) {
31
+ output.textContent += partialResults;
32
+
33
+ if (complete) {
34
+ if (!output.textContent) {
35
+ output.textContent = 'Result is empty';
36
+ }
37
+ submit.disabled = false;
38
+
39
+ const wordCount = output.textContent.split(' ').length;
40
+ const seconds = Math.round((performance.now() - startTime) / 1000, 2);
41
+ const wordCountPerSecond = Math.round(wordCount / seconds, 2);
42
+ status.innerHTML = `${wordCount} words in ${seconds} seconds, ${wordCountPerSecond} words per second`;
43
+ }
44
+ }
45
+
46
+ // Get model via Origin Private File System
47
+ async function getModelOPFS(name, url, updateModel) {
48
+ const root = await navigator.storage.getDirectory();
49
+ let fileHandle;
50
+
51
+ async function updateFile() {
52
+ const response = await fetch(url);
53
+ const buffer = await readResponse(response);
54
+ fileHandle = await root.getFileHandle(name, {create: true});
55
+ const writable = await fileHandle.createWritable();
56
+ await writable.write(buffer);
57
+ await writable.close();
58
+ return buffer;
59
+ }
60
+
61
+ if (updateModel) {
62
+ return await updateFile();
63
+ }
64
+
65
+ try {
66
+ fileHandle = await root.getFileHandle(name);
67
+ const blob = await fileHandle.getFile();
68
+ return await blob.arrayBuffer();
69
+ } catch (e) {
70
+ return await updateFile();
71
+ }
72
+ }
73
+
74
+ async function readResponse(response) {
75
+ const contentLength = response.headers.get('Content-Length');
76
+ let total = parseInt(contentLength ?? '0');
77
+ let buffer = new Uint8Array(total);
78
+ let loaded = 0;
79
+
80
+ const reader = response.body.getReader();
81
+ async function read() {
82
+ const {done, value} = await reader.read();
83
+ if (done) return;
84
+
85
+ let newLoaded = loaded + value.length;
86
+ if (newLoaded > total) {
87
+ total = newLoaded;
88
+ let newBuffer = new Uint8Array(total);
89
+ newBuffer.set(buffer);
90
+ buffer = newBuffer;
91
+ }
92
+ buffer.set(value, loaded);
93
+ loaded = newLoaded;
94
+ return read();
95
+ }
96
+
97
+ await read();
98
+ return buffer;
99
+ }
100
+
101
+ /**
102
+ * Main function to run LLM Inference.
103
+ */
104
+ async function runDemo() {
105
+ const genaiFileset = await FilesetResolver.forGenAiTasks(
106
+ 'https://cdn.jsdelivr.net/npm/@mediapipe/tasks-genai/wasm');
107
+ let llmInference;
108
+ const modelBuffer = new Int8Array(await getModelOPFS(modelFileName, modelFileName, false));
109
+
110
+ submit.onclick = () => {
111
+ startTime = performance.now();
112
+ output.textContent = '';
113
+ status.innerHTML = '';
114
+ submit.disabled = true;
115
+ llmInference.generateResponse(input.value, displayPartialResults);
116
+ };
117
+
118
+ submit.value = 'Loading the model...'
119
+ LlmInference
120
+ .createFromModelBuffer(genaiFileset, modelBuffer)
121
+ .then(llm => {
122
+ llmInference = llm;
123
+ submit.disabled = false;
124
+ submit.value = 'Get Response'
125
+ }).catch(() =>{
126
+ alert('Failed to initialize the task.');
127
+ });
128
+ }
129
+
130
+ runDemo();
style.css DELETED
@@ -1,28 +0,0 @@
1
- body {
2
- padding: 2rem;
3
- font-family: -apple-system, BlinkMacSystemFont, "Arial", sans-serif;
4
- }
5
-
6
- h1 {
7
- font-size: 16px;
8
- margin-top: 0;
9
- }
10
-
11
- p {
12
- color: rgb(107, 114, 128);
13
- font-size: 15px;
14
- margin-bottom: 10px;
15
- margin-top: 5px;
16
- }
17
-
18
- .card {
19
- max-width: 620px;
20
- margin: 0 auto;
21
- padding: 16px;
22
- border: 1px solid lightgray;
23
- border-radius: 16px;
24
- }
25
-
26
- .card p:last-child {
27
- margin-bottom: 0;
28
- }