Fraser commited on
Commit
b39563c
·
1 Parent(s): ca9e25c
Files changed (1) hide show
  1. index.html +2 -2
index.html CHANGED
@@ -160,7 +160,7 @@
160
  // Keep RAM low for mobile: small context + FP16 V‑cache (WASM safe)
161
  const LOAD_CONFIG = {
162
  n_ctx: 768,
163
- n_batch: 48,
164
  cache_type_k: "q4_0", // int4 K cache: reduces RAM without flash_attn
165
  cache_type_v: "f16", // IMPORTANT: V cache quant requires flash_attn; not available in WASM
166
  flash_attn: false,
@@ -267,7 +267,7 @@
267
  $input.value = '';
268
 
269
  const assistantBubble = ui.add('assistant', '');
270
- truncateHistoryForMobile(900);
271
 
272
  $send.disabled = true; $stop.disabled = true; // will flip to true once stream starts
273
  aborter = new AbortController();
 
160
  // Keep RAM low for mobile: small context + FP16 V‑cache (WASM safe)
161
  const LOAD_CONFIG = {
162
  n_ctx: 768,
163
+ n_batch: 128, // must be >= 64 to satisfy GGML_KQ_MASK_PAD and avoid batch overflow in wasm
164
  cache_type_k: "q4_0", // int4 K cache: reduces RAM without flash_attn
165
  cache_type_v: "f16", // IMPORTANT: V cache quant requires flash_attn; not available in WASM
166
  flash_attn: false,
 
267
  $input.value = '';
268
 
269
  const assistantBubble = ui.add('assistant', '');
270
+ truncateHistoryForMobile(600); // trim harder to reduce initial prompt size vs 768 ctx
271
 
272
  $send.disabled = true; $stop.disabled = true; // will flip to true once stream starts
273
  aborter = new AbortController();