Spaces:

Fraser
/

web-chat

Running

App Files Files Community

Fraser commited on 5 days ago

Commit

ca9e25c

1 Parent(s): f975dda

fix

Browse files

Files changed (1) hide show

index.html +71 -77

index.html CHANGED Viewed

@@ -1,4 +1,5 @@
 <!doctype html>
 <html lang="en">
 <head>
   <meta charset="utf-8" />
@@ -48,73 +49,78 @@
   <main>
     <div class="bar">
-      <label>Model:</label>
-      $1        <!-- NEW: Gemma-3-270M from ggml-org (public GGUF) -->
-        <option value='{"id":"ggml-org/gemma-3-270m-GGUF","file":"gemma-3-270m-Q8_0.gguf","label":"Gemma‑3‑270M Q8_0 (≈292 MB)"}'>Gemma‑3‑270M Q8_0 (≈292 MB)</option>
         <!-- Smallest RAM / fastest (good for phones) -->
         <option value='{"id":"mradermacher/OpenELM-270M-Instruct-GGUF","file":"OpenELM-270M-Instruct.Q3_K_S.gguf","label":"OpenELM‑270M‑Instruct Q3_K_S (≈134 MB)"}'>OpenELM‑270M‑Instruct Q3_K_S (≈134 MB)</option>
         <option value='{"id":"mradermacher/OpenELM-270M-Instruct-GGUF","file":"OpenELM-270M-Instruct.Q4_K_M.gguf","label":"OpenELM‑270M‑Instruct Q4_K_M (≈175 MB)"}'>OpenELM‑270M‑Instruct Q4_K_M (≈175 MB)</option>
-        <!-- Good quality while still small -->
-        <option value='{"id":"mav23/SmolLM-135M-Instruct-GGUF","file":"smollm-135m-instruct.Q3_K_S.gguf","label":"SmolLM‑135M‑Instruct Q3_K_S (≈88 MB)"}'>SmolLM‑135M‑Instruct Q3_K_S (≈88 MB)</option>
-        <option value='{"id":"QuantFactory/SmolLM-360M-Instruct-GGUF","file":"SmolLM-360M-Instruct.Q3_K_S.gguf","label":"SmolLM‑360M‑Instruct Q3_K_S (≈219 MB)"}'>SmolLM‑360M‑Instruct Q3_K_S (≈219 MB)</option>
-        <!-- Stronger tiny model (bigger, still phone‑possible on high‑end) -->
-        <option value='{"id":"Qwen/Qwen2.5-0.5B-Instruct-GGUF","file":"qwen2.5-0.5b-instruct-q3_k_m.gguf","label":"Qwen2.5‑0.5B‑Instruct Q3_K_M (≈432 MB)"}'>Qwen2.5‑0.5B‑Instruct Q3_K_M (≈432 MB)</option>
-        <option value='{"id":"Qwen/Qwen2.5-0.5B-Instruct-GGUF","file":"qwen2.5-0.5b-instruct-q4_k_m.gguf","label":"Qwen2.5‑0.5B‑Instruct Q4_K_M (≈491 MB)"}'>Qwen2.5‑0.5B‑Instruct Q4_K_M (≈491 MB)</option>
-        <!-- Optional: bigger but better; may be too heavy for some phones -->
-        <option value='{"id":"TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF","file":"tinyllama-1.1b-chat-v1.0.Q3_K_S.gguf","label":"TinyLlama‑1.1B‑Chat Q3_K_S (≈500 MB)"}'>TinyLlama‑1.1B‑Chat Q3_K_S (≈500 MB)</option>
-        <!-- Your original SmolLM2 360M options (kept) -->
-        <option value='{"id":"QuantFactory/SmolLM-360M-GGUF","file":"SmolLM-360M.Q4_0.gguf","label":"SmolLM2‑360M Q4_0 (≈229 MB)"}'>SmolLM2‑360M Q4_0 (≈229 MB)</option>
-        <option value='{"id":"QuantFactory/SmolLM-360M-GGUF","file":"SmolLM-360M.Q3_K_S.gguf","label":"SmolLM2‑360M Q3_K_S (≈219 MB, faster)"}'>SmolLM2‑360M Q3_K_S (≈219 MB, faster)</option>
-        <option value='{"id":"QuantFactory/SmolLM-360M-GGUF","file":"SmolLM-360M.Q2_K.gguf","label":"SmolLM2‑360M Q2_K (≈200 MB, min RAM / quality drop)"}'>SmolLM2‑360M Q2_K (≈200 MB, min RAM / quality drop)</option>
-        <!-- Custom (use for Gemma‑3‑270M when a public GGUF exists) -->
-        <option value='{"custom":true,"label":"Custom HF GGUF (e.g., Gemma‑3‑270M)"}'>Custom HF GGUF (e.g., Gemma‑3‑270M)</option>
-      </select>
-      <details id="customBox">
-        <summary class="tiny">Custom GGUF (paste HF repo + file)</summary>
-        <div class="row">
-          <label class="tiny">HF repo id</label>
-          <input id="customRepo" type="text" placeholder="e.g. google/gemma-3-270m-GGUF (when available)" style="width:280px" />
-          <label class="tiny">file</label>
-          <input id="customFile" type="text" placeholder="e.g. gemma-3-270m.Q4_0.gguf" style="width:240px" />
-        </div>
-        <div class="note">Note: official <a href="https://huggingface.co/google/gemma-3-270m" target="_blank" rel="noreferrer">Gemma‑3‑270M</a> is the base HF repo. A ready‑to‑use public GGUF is now available at <a href="https://huggingface.co/ggml-org/gemma-3-270m-GGUF" target="_blank" rel="noreferrer">ggml‑org/gemma‑3‑270m‑GGUF</a> (currently providing <code>gemma-3-270m-Q8_0.gguf</code> ≈292 MB). For maximum speed on low‑RAM phones, the OpenELM‑270M‑Instruct Q3_K_S option above is even lighter, but Gemma‑3‑270M offers strong quality for its size.</div>
-      </details>
-      <div class="row">
-        <label>Max new tokens</label>
-        <input id="nPredict" type="number" min="1" max="512" step="1" value="128" />
-      </div>
-      <div class="row">
-        <label>Temp</label><input id="temp" type="number" min="0" max="2" step="0.1" value="0.7" style="width:80px" />
-        <label>Top‑p</label><input id="topp" type="number" min="0" max="1" step="0.05" value="0.9" style="width:80px" />
-        <label>Top‑k</label><input id="topk" type="number" min="1" max="100" step="1" value="40" style="width:80px" />
-      </div>
-      <div class="spacer"></div>
-      <button id="loadBtn" class="primary">Load model</button>
-      <button id="unloadBtn" class="ghost" disabled>Unload</button>
-      <div class="progress" title="download progress"><i id="prog"></i></div>
-      <div id="stats">idle</div>
     </div>
-    <div id="chat" aria-live="polite"></div>
-    <form class="inputbar" id="form">
-      <textarea id="input" placeholder="Ask me anything…" required></textarea>
-      <div class="row" style="flex-direction:column; gap:6px; align-items:flex-end">
-        <button id="sendBtn" class="primary">Send</button>
-        <button id="stopBtn" type="button" class="ghost" disabled>Stop</button>
-        <div class="tiny">Context kept small for mobile perf</div>
-      </div>
-    </form>
   </main>
   <script type="module">
@@ -319,22 +325,10 @@
   <!--
     What changed:
-    • Added multiple small, publicly downloadable GGUFs with CORS‑friendly repos:
-      - OpenELM‑270M‑Instruct (Q3_K_S / Q4_K_M) — ~134‑175 MB
-      - SmolLM‑135M‑Instruct (Q3_K_S) — ~88 MB
-      - SmolLM‑360M‑Instruct (Q3_K_S) — ~219 MB
-      - Qwen2.5‑0.5B‑Instruct (Q3_K_M / Q4_K_M) — ~432/491 MB
-      - TinyLlama‑1.1B‑Chat (Q3_K_S) — ~500 MB (optional, heavier but strong)
-      - Kept your SmolLM2‑360M options
-    • Added a "Custom HF GGUF" path so you can paste a Gemma‑3‑270M GGUF when a public mirror appears. The official repo is gated and not GGUF; direct download from the gated repo is unlikely to work in‑browser.
-    • Kept V‑cache at f16 and disabled flash_attn to avoid the llama.cpp error "V cache quantization requires flash_attn" in WASM.
-    • Modest n_ctx (768) and n_batch (48) to keep RAM low on phones.
-    • Pinned @wllama/wllama to 2.3.1 and referenced explicit .wasm URLs (no +esm).
-    Tips:
-    • For fastest sampling on low‑RAM phones: prefer Q3_K_S quants (OpenELM‑270M‑Instruct Q3_K_S or SmolLM‑135M‑Instruct Q3_K_S).
-    • For a nice quality bump still under ~500 MB: Qwen2.5‑0.5B‑Instruct Q3_K_M.
-    • Serve with COOP/COEP headers if you want multi‑threading.
   -->
 </body>
 </html>

 <!doctype html>
 <html lang="en">
 <head>
   <meta charset="utf-8" />
   <main>
     <div class="bar">
+      <label for="model">Model:</label>
+      <!-- FIX: add the missing <select id="model"> and remove stray text node "$1" -->
+      <select id="model" class="grow">
+        <!-- NEW: Gemma-3-270M from ggml-org (public GGUF) -->
+        <option selected value='{"id":"ggml-org/gemma-3-270m-GGUF","file":"gemma-3-270m-Q8_0.gguf","label":"Gemma‑3‑270M Q8_0 (≈292 MB)"}'>Gemma‑3‑270M Q8_0 (≈292 MB)</option>
         <!-- Smallest RAM / fastest (good for phones) -->
         <option value='{"id":"mradermacher/OpenELM-270M-Instruct-GGUF","file":"OpenELM-270M-Instruct.Q3_K_S.gguf","label":"OpenELM‑270M‑Instruct Q3_K_S (≈134 MB)"}'>OpenELM‑270M‑Instruct Q3_K_S (≈134 MB)</option>
         <option value='{"id":"mradermacher/OpenELM-270M-Instruct-GGUF","file":"OpenELM-270M-Instruct.Q4_K_M.gguf","label":"OpenELM‑270M‑Instruct Q4_K_M (≈175 MB)"}'>OpenELM‑270M‑Instruct Q4_K_M (≈175 MB)</option>
+```
+    <!-- Good quality while still small -->
+    <option value='{"id":"mav23/SmolLM-135M-Instruct-GGUF","file":"smollm-135m-instruct.Q3_K_S.gguf","label":"SmolLM‑135M‑Instruct Q3_K_S (≈88 MB)"}'>SmolLM‑135M‑Instruct Q3_K_S (≈88 MB)</option>
+    <option value='{"id":"QuantFactory/SmolLM-360M-Instruct-GGUF","file":"SmolLM-360M-Instruct.Q3_K_S.gguf","label":"SmolLM‑360M‑Instruct Q3_K_S (≈219 MB)"}'>SmolLM‑360M‑Instruct Q3_K_S (≈219 MB)</option>
+    <!-- Stronger tiny model (bigger, still phone‑possible on high‑end) -->
+    <option value='{"id":"Qwen/Qwen2.5-0.5B-Instruct-GGUF","file":"qwen2.5-0.5b-instruct-q3_k_m.gguf","label":"Qwen2.5‑0.5B‑Instruct Q3_K_M (≈432 MB)"}'>Qwen2.5‑0.5B‑Instruct Q3_K_M (≈432 MB)</option>
+    <option value='{"id":"Qwen/Qwen2.5-0.5B-Instruct-GGUF","file":"qwen2.5-0.5b-instruct-q4_k_m.gguf","label":"Qwen2.5‑0.5B‑Instruct Q4_K_M (≈491 MB)"}'>Qwen2.5‑0.5B‑Instruct Q4_K_M (≈491 MB)</option>
+    <!-- Optional: bigger but better; may be too heavy for some phones -->
+    <option value='{"id":"TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF","file":"tinyllama-1.1b-chat-v1.0.Q3_K_S.gguf","label":"TinyLlama‑1.1B‑Chat Q3_K_S (≈500 MB)"}'>TinyLlama‑1.1B‑Chat Q3_K_S (≈500 MB)</option>
+    <!-- Your original SmolLM2 360M options (kept) -->
+    <option value='{"id":"QuantFactory/SmolLM-360M-GGUF","file":"SmolLM-360M.Q4_0.gguf","label":"SmolLM2‑360M Q4_0 (≈229 MB)"}'>SmolLM2‑360M Q4_0 (≈229 MB)</option>
+    <option value='{"id":"QuantFactory/SmolLM-360M-GGUF","file":"SmolLM-360M.Q3_K_S.gguf","label":"SmolLM2‑360M Q3_K_S (≈219 MB, faster)"}'>SmolLM2‑360M Q3_K_S (≈219 MB, faster)</option>
+    <option value='{"id":"QuantFactory/SmolLM-360M-GGUF","file":"SmolLM-360M.Q2_K.gguf","label":"SmolLM2‑360M Q2_K (≈200 MB, min RAM / quality drop)"}'>SmolLM2‑360M Q2_K (≈200 MB, min RAM / quality drop)</option>
+    <!-- Custom (use for Gemma‑3‑270M when a public GGUF exists) -->
+    <option value='{"custom":true,"label":"Custom HF GGUF (e.g., Gemma‑3‑270M)"}'>Custom HF GGUF (e.g., Gemma‑3‑270M)</option>
+  </select>
+  <details id="customBox">
+    <summary class="tiny">Custom GGUF (paste HF repo + file)</summary>
+    <div class="row">
+      <label class="tiny">HF repo id</label>
+      <input id="customRepo" type="text" placeholder="e.g. google/gemma-3-270m-GGUF (when available)" style="width:280px" />
+      <label class="tiny">file</label>
+      <input id="customFile" type="text" placeholder="e.g. gemma-3-270m.Q4_0.gguf" style="width:240px" />
     </div>
+    <div class="note">Note: official <a href="https://huggingface.co/google/gemma-3-270m" target="_blank" rel="noreferrer">Gemma‑3‑270M</a> is the base HF repo. A ready‑to‑use public GGUF is now available at <a href="https://huggingface.co/ggml-org/gemma-3-270m-GGUF" target="_blank" rel="noreferrer">ggml‑org/gemma‑3‑270m‑GGUF</a> (currently providing <code>gemma-3-270m-Q8_0.gguf</code> ≈292 MB). For maximum speed on low‑RAM phones, the OpenELM‑270M‑Instruct Q3_K_S option above is even lighter, but Gemma‑3‑270M offers strong quality for its size.</div>
+  </details>
+  <div class="row">
+    <label>Max new tokens</label>
+    <input id="nPredict" type="number" min="1" max="512" step="1" value="128" />
+  </div>
+  <div class="row">
+    <label>Temp</label><input id="temp" type="number" min="0" max="2" step="0.1" value="0.7" style="width:80px" />
+    <label>Top‑p</label><input id="topp" type="number" min="0" max="1" step="0.05" value="0.9" style="width:80px" />
+    <label>Top‑k</label><input id="topk" type="number" min="1" max="100" step="1" value="40" style="width:80px" />
+  </div>
+  <div class="spacer"></div>
+  <button id="loadBtn" class="primary">Load model</button>
+  <button id="unloadBtn" class="ghost" disabled>Unload</button>
+  <div class="progress" title="download progress"><i id="prog"></i></div>
+  <div id="stats">idle</div>
+</div>
+<div id="chat" aria-live="polite"></div>
+<form class="inputbar" id="form">
+  <textarea id="input" placeholder="Ask me anything…" required></textarea>
+  <div class="row" style="flex-direction:column; gap:6px; align-items:flex-end">
+    <button id="sendBtn" class="primary">Send</button>
+    <button id="stopBtn" type="button" class="ghost" disabled>Stop</button>
+    <div class="tiny">Context kept small for mobile perf</div>
+  </div>
+</form>
+```
   </main>
   <script type="module">
   <!--
     What changed:
+    • FIXED: the crash was caused by a missing <select id="model"> (document.getElementById('model') returned null). Added the select and removed a stray "$1" text node.
+    • Added explicit label-for association (for accessibility) and set the first option as selected.
+    • Kept all other logic identical.
   -->
 </body>
 </html>