fx
Browse files- index.html +2 -2
index.html
CHANGED
@@ -160,7 +160,7 @@
|
|
160 |
// Keep RAM low for mobile: small context + FP16 V‑cache (WASM safe)
|
161 |
const LOAD_CONFIG = {
|
162 |
n_ctx: 768,
|
163 |
-
n_batch:
|
164 |
cache_type_k: "q4_0", // int4 K cache: reduces RAM without flash_attn
|
165 |
cache_type_v: "f16", // IMPORTANT: V cache quant requires flash_attn; not available in WASM
|
166 |
flash_attn: false,
|
@@ -267,7 +267,7 @@
|
|
267 |
$input.value = '';
|
268 |
|
269 |
const assistantBubble = ui.add('assistant', '');
|
270 |
-
truncateHistoryForMobile(
|
271 |
|
272 |
$send.disabled = true; $stop.disabled = true; // will flip to true once stream starts
|
273 |
aborter = new AbortController();
|
|
|
160 |
// Keep RAM low for mobile: small context + FP16 V‑cache (WASM safe)
|
161 |
const LOAD_CONFIG = {
|
162 |
n_ctx: 768,
|
163 |
+
n_batch: 128, // must be >= 64 to satisfy GGML_KQ_MASK_PAD and avoid batch overflow in wasm
|
164 |
cache_type_k: "q4_0", // int4 K cache: reduces RAM without flash_attn
|
165 |
cache_type_v: "f16", // IMPORTANT: V cache quant requires flash_attn; not available in WASM
|
166 |
flash_attn: false,
|
|
|
267 |
$input.value = '';
|
268 |
|
269 |
const assistantBubble = ui.add('assistant', '');
|
270 |
+
truncateHistoryForMobile(600); // trim harder to reduce initial prompt size vs 768 ctx
|
271 |
|
272 |
$send.disabled = true; $stop.disabled = true; // will flip to true once stream starts
|
273 |
aborter = new AbortController();
|