Fraser commited on
Commit
ca9e25c
·
1 Parent(s): f975dda
Files changed (1) hide show
  1. index.html +71 -77
index.html CHANGED
@@ -1,4 +1,5 @@
1
  <!doctype html>
 
2
  <html lang="en">
3
  <head>
4
  <meta charset="utf-8" />
@@ -48,73 +49,78 @@
48
 
49
  <main>
50
  <div class="bar">
51
- <label>Model:</label>
52
- $1 <!-- NEW: Gemma-3-270M from ggml-org (public GGUF) -->
53
- <option value='{"id":"ggml-org/gemma-3-270m-GGUF","file":"gemma-3-270m-Q8_0.gguf","label":"Gemma‑3‑270M Q8_0 (≈292 MB)"}'>Gemma‑3‑270M Q8_0 (≈292 MB)</option>
 
 
54
  <!-- Smallest RAM / fastest (good for phones) -->
55
  <option value='{"id":"mradermacher/OpenELM-270M-Instruct-GGUF","file":"OpenELM-270M-Instruct.Q3_K_S.gguf","label":"OpenELM‑270M‑Instruct Q3_K_S (≈134 MB)"}'>OpenELM‑270M‑Instruct Q3_K_S (≈134 MB)</option>
56
  <option value='{"id":"mradermacher/OpenELM-270M-Instruct-GGUF","file":"OpenELM-270M-Instruct.Q4_K_M.gguf","label":"OpenELM‑270M‑Instruct Q4_K_M (≈175 MB)"}'>OpenELM‑270M‑Instruct Q4_K_M (≈175 MB)</option>
57
 
58
- <!-- Good quality while still small -->
59
- <option value='{"id":"mav23/SmolLM-135M-Instruct-GGUF","file":"smollm-135m-instruct.Q3_K_S.gguf","label":"SmolLM‑135M‑Instruct Q3_K_S (≈88 MB)"}'>SmolLM‑135M‑Instruct Q3_K_S (≈88 MB)</option>
60
- <option value='{"id":"QuantFactory/SmolLM-360M-Instruct-GGUF","file":"SmolLM-360M-Instruct.Q3_K_S.gguf","label":"SmolLM‑360M‑Instruct Q3_K_S (≈219 MB)"}'>SmolLM‑360M‑Instruct Q3_K_S (≈219 MB)</option>
61
-
62
- <!-- Stronger tiny model (bigger, still phone‑possible on high‑end) -->
63
- <option value='{"id":"Qwen/Qwen2.5-0.5B-Instruct-GGUF","file":"qwen2.5-0.5b-instruct-q3_k_m.gguf","label":"Qwen2.5‑0.5B‑Instruct Q3_K_M (≈432 MB)"}'>Qwen2.50.5B‑Instruct Q3_K_M (≈432 MB)</option>
64
- <option value='{"id":"Qwen/Qwen2.5-0.5B-Instruct-GGUF","file":"qwen2.5-0.5b-instruct-q4_k_m.gguf","label":"Qwen2.5‑0.5B‑Instruct Q4_K_M (≈491 MB)"}'>Qwen2.5‑0.5B‑Instruct Q4_K_M (≈491 MB)</option>
65
-
66
- <!-- Optional: bigger but better; may be too heavy for some phones -->
67
- <option value='{"id":"TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF","file":"tinyllama-1.1b-chat-v1.0.Q3_K_S.gguf","label":"TinyLlama‑1.1B‑Chat Q3_K_S (≈500 MB)"}'>TinyLlama‑1.1B‑Chat Q3_K_S (≈500 MB)</option>
68
-
69
- <!-- Your original SmolLM2 360M options (kept) -->
70
- <option value='{"id":"QuantFactory/SmolLM-360M-GGUF","file":"SmolLM-360M.Q4_0.gguf","label":"SmolLM2‑360M Q4_0 (≈229 MB)"}'>SmolLM2360M Q4_0 (≈229 MB)</option>
71
- <option value='{"id":"QuantFactory/SmolLM-360M-GGUF","file":"SmolLM-360M.Q3_K_S.gguf","label":"SmolLM2‑360M Q3_K_S (≈219 MB, faster)"}'>SmolLM2‑360M Q3_K_S (≈219 MB, faster)</option>
72
- <option value='{"id":"QuantFactory/SmolLM-360M-GGUF","file":"SmolLM-360M.Q2_K.gguf","label":"SmolLM2‑360M Q2_K (≈200 MB, min RAM / quality drop)"}'>SmolLM2‑360M Q2_K (≈200 MB, min RAM / quality drop)</option>
73
-
74
- <!-- Custom (use for Gemma‑3‑270M when a public GGUF exists) -->
75
- <option value='{"custom":true,"label":"Custom HF GGUF (e.g., Gemma‑3‑270M)"}'>Custom HF GGUF (e.g., Gemma‑3‑270M)</option>
76
- </select>
77
-
78
- <details id="customBox">
79
- <summary class="tiny">Custom GGUF (paste HF repo + file)</summary>
80
- <div class="row">
81
- <label class="tiny">HF repo id</label>
82
- <input id="customRepo" type="text" placeholder="e.g. google/gemma-3-270m-GGUF (when available)" style="width:280px" />
83
- <label class="tiny">file</label>
84
- <input id="customFile" type="text" placeholder="e.g. gemma-3-270m.Q4_0.gguf" style="width:240px" />
85
- </div>
86
- <div class="note">Note: official <a href="https://huggingface.co/google/gemma-3-270m" target="_blank" rel="noreferrer">Gemma‑3‑270M</a> is the base HF repo. A ready‑to‑use public GGUF is now available at <a href="https://huggingface.co/ggml-org/gemma-3-270m-GGUF" target="_blank" rel="noreferrer">ggml‑org/gemma‑3‑270m‑GGUF</a> (currently providing <code>gemma-3-270m-Q8_0.gguf</code> ≈292 MB). For maximum speed on low‑RAM phones, the OpenELM‑270M‑Instruct Q3_K_S option above is even lighter, but Gemma‑3‑270M offers strong quality for its size.</div>
87
- </details>
88
-
89
- <div class="row">
90
- <label>Max new tokens</label>
91
- <input id="nPredict" type="number" min="1" max="512" step="1" value="128" />
92
- </div>
93
- <div class="row">
94
- <label>Temp</label><input id="temp" type="number" min="0" max="2" step="0.1" value="0.7" style="width:80px" />
95
- <label>Top‑p</label><input id="topp" type="number" min="0" max="1" step="0.05" value="0.9" style="width:80px" />
96
- <label>Top‑k</label><input id="topk" type="number" min="1" max="100" step="1" value="40" style="width:80px" />
97
- </div>
98
-
99
- <div class="spacer"></div>
100
-
101
- <button id="loadBtn" class="primary">Load model</button>
102
- <button id="unloadBtn" class="ghost" disabled>Unload</button>
103
-
104
- <div class="progress" title="download progress"><i id="prog"></i></div>
105
- <div id="stats">idle</div>
106
  </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
 
108
- <div id="chat" aria-live="polite"></div>
109
-
110
- <form class="inputbar" id="form">
111
- <textarea id="input" placeholder="Ask me anything…" required></textarea>
112
- <div class="row" style="flex-direction:column; gap:6px; align-items:flex-end">
113
- <button id="sendBtn" class="primary">Send</button>
114
- <button id="stopBtn" type="button" class="ghost" disabled>Stop</button>
115
- <div class="tiny">Context kept small for mobile perf</div>
116
- </div>
117
- </form>
118
  </main>
119
 
120
  <script type="module">
@@ -319,22 +325,10 @@
319
 
320
  <!--
321
  What changed:
322
- • Added multiple small, publicly downloadable GGUFs with CORS‑friendly repos:
323
- - OpenELM‑270M‑Instruct (Q3_K_S / Q4_K_M) ~134‑175 MB
324
- - SmolLM‑135M‑Instruct (Q3_K_S) ~88 MB
325
- - SmolLM‑360M‑Instruct (Q3_K_S) — ~219 MB
326
- - Qwen2.5‑0.5B‑Instruct (Q3_K_M / Q4_K_M) — ~432/491 MB
327
- - TinyLlama‑1.1B‑Chat (Q3_K_S) — ~500 MB (optional, heavier but strong)
328
- - Kept your SmolLM2‑360M options
329
- • Added a "Custom HF GGUF" path so you can paste a Gemma‑3‑270M GGUF when a public mirror appears. The official repo is gated and not GGUF; direct download from the gated repo is unlikely to work in‑browser.
330
- • Kept V‑cache at f16 and disabled flash_attn to avoid the llama.cpp error "V cache quantization requires flash_attn" in WASM.
331
- • Modest n_ctx (768) and n_batch (48) to keep RAM low on phones.
332
- • Pinned @wllama/wllama to 2.3.1 and referenced explicit .wasm URLs (no +esm).
333
-
334
- Tips:
335
- • For fastest sampling on low‑RAM phones: prefer Q3_K_S quants (OpenELM‑270M‑Instruct Q3_K_S or SmolLM‑135M‑Instruct Q3_K_S).
336
- • For a nice quality bump still under ~500 MB: Qwen2.5‑0.5B‑Instruct Q3_K_M.
337
- • Serve with COOP/COEP headers if you want multi‑threading.
338
  -->
 
339
  </body>
340
  </html>
 
1
  <!doctype html>
2
+
3
  <html lang="en">
4
  <head>
5
  <meta charset="utf-8" />
 
49
 
50
  <main>
51
  <div class="bar">
52
+ <label for="model">Model:</label>
53
+ <!-- FIX: add the missing <select id="model"> and remove stray text node "$1" -->
54
+ <select id="model" class="grow">
55
+ <!-- NEW: Gemma-3-270M from ggml-org (public GGUF) -->
56
+ <option selected value='{"id":"ggml-org/gemma-3-270m-GGUF","file":"gemma-3-270m-Q8_0.gguf","label":"Gemma‑3‑270M Q8_0 (≈292 MB)"}'>Gemma‑3‑270M Q8_0 (≈292 MB)</option>
57
  <!-- Smallest RAM / fastest (good for phones) -->
58
  <option value='{"id":"mradermacher/OpenELM-270M-Instruct-GGUF","file":"OpenELM-270M-Instruct.Q3_K_S.gguf","label":"OpenELM‑270M‑Instruct Q3_K_S (≈134 MB)"}'>OpenELM‑270M‑Instruct Q3_K_S (≈134 MB)</option>
59
  <option value='{"id":"mradermacher/OpenELM-270M-Instruct-GGUF","file":"OpenELM-270M-Instruct.Q4_K_M.gguf","label":"OpenELM‑270M‑Instruct Q4_K_M (≈175 MB)"}'>OpenELM‑270M‑Instruct Q4_K_M (≈175 MB)</option>
60
 
61
+ ```
62
+ <!-- Good quality while still small -->
63
+ <option value='{"id":"mav23/SmolLM-135M-Instruct-GGUF","file":"smollm-135m-instruct.Q3_K_S.gguf","label":"SmolLM‑135M‑Instruct Q3_K_S (≈88 MB)"}'>SmolLM‑135M‑Instruct Q3_K_S (≈88 MB)</option>
64
+ <option value='{"id":"QuantFactory/SmolLM-360M-Instruct-GGUF","file":"SmolLM-360M-Instruct.Q3_K_S.gguf","label":"SmolLM‑360M‑Instruct Q3_K_S (≈219 MB)"}'>SmolLM‑360M‑Instruct Q3_K_S (≈219 MB)</option>
65
+
66
+ <!-- Stronger tiny model (bigger, still phonepossible on high‑end) -->
67
+ <option value='{"id":"Qwen/Qwen2.5-0.5B-Instruct-GGUF","file":"qwen2.5-0.5b-instruct-q3_k_m.gguf","label":"Qwen2.5‑0.5B‑Instruct Q3_K_M (≈432 MB)"}'>Qwen2.5‑0.5B‑Instruct Q3_K_M (≈432 MB)</option>
68
+ <option value='{"id":"Qwen/Qwen2.5-0.5B-Instruct-GGUF","file":"qwen2.5-0.5b-instruct-q4_k_m.gguf","label":"Qwen2.5‑0.5B‑Instruct Q4_K_M (≈491 MB)"}'>Qwen2.5‑0.5B‑Instruct Q4_K_M (≈491 MB)</option>
69
+
70
+ <!-- Optional: bigger but better; may be too heavy for some phones -->
71
+ <option value='{"id":"TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF","file":"tinyllama-1.1b-chat-v1.0.Q3_K_S.gguf","label":"TinyLlama‑1.1B‑Chat Q3_K_S (≈500 MB)"}'>TinyLlama‑1.1B‑Chat Q3_K_S (≈500 MB)</option>
72
+
73
+ <!-- Your original SmolLM2 360M options (kept) -->
74
+ <option value='{"id":"QuantFactory/SmolLM-360M-GGUF","file":"SmolLM-360M.Q4_0.gguf","label":"SmolLM2‑360M Q4_0 (≈229 MB)"}'>SmolLM2‑360M Q4_0 (≈229 MB)</option>
75
+ <option value='{"id":"QuantFactory/SmolLM-360M-GGUF","file":"SmolLM-360M.Q3_K_S.gguf","label":"SmolLM2‑360M Q3_K_S (≈219 MB, faster)"}'>SmolLM2‑360M Q3_K_S (≈219 MB, faster)</option>
76
+ <option value='{"id":"QuantFactory/SmolLM-360M-GGUF","file":"SmolLM-360M.Q2_K.gguf","label":"SmolLM2‑360M Q2_K (≈200 MB, min RAM / quality drop)"}'>SmolLM2‑360M Q2_K (≈200 MB, min RAM / quality drop)</option>
77
+
78
+ <!-- Custom (use for Gemma‑3‑270M when a public GGUF exists) -->
79
+ <option value='{"custom":true,"label":"Custom HF GGUF (e.g., Gemma‑3‑270M)"}'>Custom HF GGUF (e.g., Gemma‑3‑270M)</option>
80
+ </select>
81
+
82
+ <details id="customBox">
83
+ <summary class="tiny">Custom GGUF (paste HF repo + file)</summary>
84
+ <div class="row">
85
+ <label class="tiny">HF repo id</label>
86
+ <input id="customRepo" type="text" placeholder="e.g. google/gemma-3-270m-GGUF (when available)" style="width:280px" />
87
+ <label class="tiny">file</label>
88
+ <input id="customFile" type="text" placeholder="e.g. gemma-3-270m.Q4_0.gguf" style="width:240px" />
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  </div>
90
+ <div class="note">Note: official <a href="https://huggingface.co/google/gemma-3-270m" target="_blank" rel="noreferrer">Gemma‑3‑270M</a> is the base HF repo. A ready‑to‑use public GGUF is now available at <a href="https://huggingface.co/ggml-org/gemma-3-270m-GGUF" target="_blank" rel="noreferrer">ggml‑org/gemma‑3‑270m‑GGUF</a> (currently providing <code>gemma-3-270m-Q8_0.gguf</code> ≈292 MB). For maximum speed on low‑RAM phones, the OpenELM‑270M‑Instruct Q3_K_S option above is even lighter, but Gemma‑3‑270M offers strong quality for its size.</div>
91
+ </details>
92
+
93
+ <div class="row">
94
+ <label>Max new tokens</label>
95
+ <input id="nPredict" type="number" min="1" max="512" step="1" value="128" />
96
+ </div>
97
+ <div class="row">
98
+ <label>Temp</label><input id="temp" type="number" min="0" max="2" step="0.1" value="0.7" style="width:80px" />
99
+ <label>Top‑p</label><input id="topp" type="number" min="0" max="1" step="0.05" value="0.9" style="width:80px" />
100
+ <label>Top‑k</label><input id="topk" type="number" min="1" max="100" step="1" value="40" style="width:80px" />
101
+ </div>
102
+
103
+ <div class="spacer"></div>
104
+
105
+ <button id="loadBtn" class="primary">Load model</button>
106
+ <button id="unloadBtn" class="ghost" disabled>Unload</button>
107
+
108
+ <div class="progress" title="download progress"><i id="prog"></i></div>
109
+ <div id="stats">idle</div>
110
+ </div>
111
+
112
+ <div id="chat" aria-live="polite"></div>
113
+
114
+ <form class="inputbar" id="form">
115
+ <textarea id="input" placeholder="Ask me anything…" required></textarea>
116
+ <div class="row" style="flex-direction:column; gap:6px; align-items:flex-end">
117
+ <button id="sendBtn" class="primary">Send</button>
118
+ <button id="stopBtn" type="button" class="ghost" disabled>Stop</button>
119
+ <div class="tiny">Context kept small for mobile perf</div>
120
+ </div>
121
+ </form>
122
+ ```
123
 
 
 
 
 
 
 
 
 
 
 
124
  </main>
125
 
126
  <script type="module">
 
325
 
326
  <!--
327
  What changed:
328
+ FIXED: the crash was caused by a missing <select id="model"> (document.getElementById('model') returned null). Added the select and removed a stray "$1" text node.
329
+ • Added explicit label-for association (for accessibility) and set the first option as selected.
330
+ Kept all other logic identical.
 
 
 
 
 
 
 
 
 
 
 
 
 
331
  -->
332
+
333
  </body>
334
  </html>