Spaces:

pdufour
/

Qwen2-VL-2B-Instruct-ONNX-Q4-F16

Running

pdufour commited on Nov 19, 2024

Commit

ee6199f

verified ·

1 Parent(s): f9cf2b4

Update index.js

Files changed (1) hide show

index.js CHANGED Viewed

@@ -12,6 +12,17 @@ const BASE_MODEL = "Qwen/Qwen2-VL-2B-Instruct";
 const ONNX_MODEL = "pdufour/Qwen2-VL-2B-Instruct-ONNX-Q4-F16";
 const QUANT = "q4f16";
 const MAX_SINGLE_CHAT_LENGTH = 10;
 // UI Elements
 const exampleButton = document.getElementById('example');
@@ -35,17 +46,17 @@ async function initializeSessions() {
   ortSessionA = await ort.InferenceSession.create(
     await getModelFile(ONNX_MODEL, `onnx/QwenVL_A_${QUANT}.onnx`),
-    { executionProviders: ["webgpu"] }
   );
   ortSessionB = await ort.InferenceSession.create(
     await getModelFile(ONNX_MODEL, `onnx/QwenVL_B_${QUANT}.onnx`),
-    { executionProviders: ["webgpu"] }
   );
   ortSessionC = await ort.InferenceSession.create(
     await getModelFile(ONNX_MODEL, `onnx/QwenVL_C_${QUANT}.onnx`),
-    { executionProviders: ["webgpu"] }
   );
   config = (await getModelJSON(BASE_MODEL, "config.json"));
@@ -243,9 +254,7 @@ export async function imageTextToText(
     ortSessionD = await ort.InferenceSession.create(
       await getModelFile(ONNX_MODEL, `onnx/QwenVL_D_${QUANT}.onnx`),
-      {
-        executionProviders: ["webgpu"],
-      }
     );
     ({ hidden_states, position_ids } = await ortSessionD.run({
@@ -271,9 +280,7 @@ export async function imageTextToText(
     if (!ortSessionE) {
       ortSessionE = await ort.InferenceSession.create(
         await getModelFile(ONNX_MODEL, `onnx/QwenVL_E_${QUANT}.onnx`),
-        {
-          executionProviders: ["webgpu"],
-        },
       );
     }

 const ONNX_MODEL = "pdufour/Qwen2-VL-2B-Instruct-ONNX-Q4-F16";
 const QUANT = "q4f16";
 const MAX_SINGLE_CHAT_LENGTH = 10;
+const DEFAULT_SESSION_OPTIONS = {
+  executionProviders: ["webgpu"],
+  logSeverityLevel: 0,
+  logVerbosityLevel: 1,
+  enableProfiling: true,
+  enableCpuMemArena: true,
+  graphOptimizationLevel: "all",
+  executionMode: "sequential",
+  intraOpNumThreads: 0,
+  interOpNumThreads: 0,
+}
 // UI Elements
 const exampleButton = document.getElementById('example');
   ortSessionA = await ort.InferenceSession.create(
     await getModelFile(ONNX_MODEL, `onnx/QwenVL_A_${QUANT}.onnx`),
+    DEFAULT_SESSION_OPTIONS,
   );
   ortSessionB = await ort.InferenceSession.create(
     await getModelFile(ONNX_MODEL, `onnx/QwenVL_B_${QUANT}.onnx`),
+    DEFAULT_SESSION_OPTIONS,
   );
   ortSessionC = await ort.InferenceSession.create(
     await getModelFile(ONNX_MODEL, `onnx/QwenVL_C_${QUANT}.onnx`),
+    DEFAULT_SESSION_OPTIONS,
   );
   config = (await getModelJSON(BASE_MODEL, "config.json"));
     ortSessionD = await ort.InferenceSession.create(
       await getModelFile(ONNX_MODEL, `onnx/QwenVL_D_${QUANT}.onnx`),
+      DEFAULT_SESSION_OPTIONS,
     );
     ({ hidden_states, position_ids } = await ortSessionD.run({
     if (!ortSessionE) {
       ortSessionE = await ort.InferenceSession.create(
         await getModelFile(ONNX_MODEL, `onnx/QwenVL_E_${QUANT}.onnx`),
+        DEFAULT_SESSION_OPTIONS,
       );
     }