Spaces:

webai-community
/

demos

Running

App Files Files Community

Yang Gu commited on Mar 14, 2024

Commit

afed82d

1 Parent(s): b02752e

Add llm based on MediaPipe and TFLite

Browse files

Files changed (6) hide show

README.md +0 -1
index.html +36 -19
llm-inference/gemma-2b-it-gpu-int4.bin +3 -0
llm-inference/index.html +32 -0
llm-inference/index.js +130 -0
style.css +0 -28

README.md CHANGED Viewed

@@ -7,4 +7,3 @@ sdk: static
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference


7	pinned: false
8	---
9

index.html CHANGED Viewed

@@ -1,19 +1,36 @@
-<!doctype html>
-<html>
-	<head>
-		<meta charset="utf-8" />
-		<meta name="viewport" content="width=device-width" />
-		<title>My static Space</title>
-		<link rel="stylesheet" href="style.css" />
-	</head>
-	<body>
-		<div class="card">
-			<h1>Welcome to your static Space!</h1>
-			<p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
-			<p>
-				Also don't forget to check the
-				<a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
-			</p>
-		</div>
-	</body>
-</html>

+<title>WebGPU AI Demos</title>
+<body>
+  <h1 align="center">WebGPU AI Demos</h1>
+  <script src="main.js"></script>
+  <script>
+    "use strict";
+    const demos = [["LLMs with MediaPipe and TFLite", "llm-inference", "<a href=https://github.com/googlesamples/mediapipe/tree/main/examples/llm_inference/js>original code</a>, <a href=https://developers.googleblog.com/2024/03/running-large-language-models-on-device-with-mediapipe-andtensorflow-lite.html>more info</a>"]];
+	// table
+    const table = document.createElement("table");
+    table.align = "center";
+    table.style.width = "80%";
+    table.setAttribute("border", "1");
+    document.body.appendChild(table);
+    // first line
+    let row = table.insertRow(-1);
+	const headers = ["Name", "Description"];
+    row.style.fontWeight = "bold";
+	for (let header of headers) {
+			  let td = row.insertCell(-1);
+	  td.innerHTML = header;
+	}
+	// rest of lines
+	for (let demo of demos) {
+	  row = table.insertRow(-1);
+	  let td = row.insertCell(-1);
+	  td.innerHTML = `<a href=${demo[1]}>${demo[0]}</a>`;
+	  td = row.insertCell(-1);
+	  td.innerHTML = demo[2];
+	}
+  </script>
+</body>

llm-inference/gemma-2b-it-gpu-int4.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ef44d548e44a2a6f313c3f3e94a48e1de786871ad95f4cd81bfb35372032cdbd
+size 1354301440

llm-inference/index.html ADDED Viewed

	@@ -0,0 +1,32 @@

+<!-- Copyright 2024 The MediaPipe Authors.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+     http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. -->
+<!DOCTYPE html>
+<html lang="en">
+  <head>
+    <title>LLM Inference Web Demo</title>
+  </head>
+  <body>
+    <div style="text-align: center">
+      Input<br />
+      <textarea id="input" style="height: 100px; width: 80%;"></textarea><br />
+      <input type="button" id="submit" value="Get Response" disabled /><br />
+      <text id="status"></text><br />
+      <br />
+      Result<br />
+      <textarea id="output" style="height: 800px; width: 80%;"></textarea>
+      <script type="module" src="index.js"></script>
+    </div>
+  </body>
+</html>

llm-inference/index.js ADDED Viewed

	@@ -0,0 +1,130 @@

+// Copyright 2024 The MediaPipe Authors.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//      http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// ---------------------------------------------------------------------------------------- //
+import {FilesetResolver, LlmInference} from 'https://cdn.jsdelivr.net/npm/@mediapipe/tasks-genai';
+const input = document.getElementById('input');
+const output = document.getElementById('output');
+const submit = document.getElementById('submit');
+const status = document.getElementById('status');
+const modelFileName = 'gemma-2b-it-gpu-int4.bin'; /* Update the file name */
+let startTime;
+/**
+ * Display newly generated partial results to the output text box.
+ */
+function displayPartialResults(partialResults, complete) {
+  output.textContent += partialResults;
+  if (complete) {
+    if (!output.textContent) {
+      output.textContent = 'Result is empty';
+    }
+    submit.disabled = false;
+    const wordCount = output.textContent.split(' ').length;
+    const seconds = Math.round((performance.now() - startTime) / 1000, 2);
+    const wordCountPerSecond = Math.round(wordCount / seconds, 2);
+    status.innerHTML = `${wordCount} words in ${seconds} seconds, ${wordCountPerSecond} words per second`;
+  }
+}
+// Get model via Origin Private File System
+async function getModelOPFS(name, url, updateModel) {
+  const root = await navigator.storage.getDirectory();
+  let fileHandle;
+  async function updateFile() {
+    const response = await fetch(url);
+    const buffer = await readResponse(response);
+    fileHandle = await root.getFileHandle(name, {create: true});
+    const writable = await fileHandle.createWritable();
+    await writable.write(buffer);
+    await writable.close();
+    return buffer;
+  }
+  if (updateModel) {
+    return await updateFile();
+  }
+  try {
+    fileHandle = await root.getFileHandle(name);
+    const blob = await fileHandle.getFile();
+    return await blob.arrayBuffer();
+  } catch (e) {
+    return await updateFile();
+  }
+}
+async function readResponse(response) {
+  const contentLength = response.headers.get('Content-Length');
+  let total = parseInt(contentLength ?? '0');
+  let buffer = new Uint8Array(total);
+  let loaded = 0;
+  const reader = response.body.getReader();
+  async function read() {
+    const {done, value} = await reader.read();
+    if (done) return;
+    let newLoaded = loaded + value.length;
+    if (newLoaded > total) {
+      total = newLoaded;
+      let newBuffer = new Uint8Array(total);
+      newBuffer.set(buffer);
+      buffer = newBuffer;
+    }
+    buffer.set(value, loaded);
+    loaded = newLoaded;
+    return read();
+  }
+  await read();
+  return buffer;
+}
+/**
+ * Main function to run LLM Inference.
+ */
+async function runDemo() {
+  const genaiFileset = await FilesetResolver.forGenAiTasks(
+      'https://cdn.jsdelivr.net/npm/@mediapipe/tasks-genai/wasm');
+  let llmInference;
+  const modelBuffer = new Int8Array(await getModelOPFS(modelFileName, modelFileName, false));
+  submit.onclick = () => {
+    startTime = performance.now();
+    output.textContent = '';
+    status.innerHTML = '';
+    submit.disabled = true;
+    llmInference.generateResponse(input.value, displayPartialResults);
+  };
+  submit.value = 'Loading the model...'
+  LlmInference
+      .createFromModelBuffer(genaiFileset, modelBuffer)
+      .then(llm => {
+        llmInference = llm;
+        submit.disabled = false;
+        submit.value = 'Get Response'
+      }).catch(() =>{
+        alert('Failed to initialize the task.');
+      });
+}
+runDemo();

style.css DELETED Viewed

@@ -1,28 +0,0 @@
-body {
-	padding: 2rem;
-	font-family: -apple-system, BlinkMacSystemFont, "Arial", sans-serif;
-}
-h1 {
-	font-size: 16px;
-	margin-top: 0;
-}
-p {
-	color: rgb(107, 114, 128);
-	font-size: 15px;
-	margin-bottom: 10px;
-	margin-top: 5px;
-}
-.card {
-	max-width: 620px;
-	margin: 0 auto;
-	padding: 16px;
-	border: 1px solid lightgray;
-	border-radius: 16px;
-}
-.card p:last-child {
-	margin-bottom: 0;
-}