sitammeur commited on
Commit
2b86e0c
·
verified ·
1 Parent(s): 50dea7d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -5
app.py CHANGED
@@ -77,8 +77,10 @@ def respond(
77
  model_path=f"models/{model}",
78
  flash_attn=False,
79
  n_gpu_layers=0,
80
- n_batch=32,
81
- n_ctx=8192,
 
 
82
  )
83
  llm_model = model
84
  provider = LlamaCppPythonProvider(llm)
@@ -158,9 +160,9 @@ demo = gr.ChatInterface(
158
  ),
159
  gr.Slider(
160
  minimum=512,
161
- maximum=4096,
162
- value=2048,
163
- step=512,
164
  label="Max Tokens",
165
  info="Maximum length of response (higher = longer replies)",
166
  ),
 
77
  model_path=f"models/{model}",
78
  flash_attn=False,
79
  n_gpu_layers=0,
80
+ n_batch=16,
81
+ n_ctx=2048,
82
+ n_threads=2,
83
+ use_mmap=True
84
  )
85
  llm_model = model
86
  provider = LlamaCppPythonProvider(llm)
 
160
  ),
161
  gr.Slider(
162
  minimum=512,
163
+ maximum=2048,
164
+ value=1024,
165
+ step=1,
166
  label="Max Tokens",
167
  info="Maximum length of response (higher = longer replies)",
168
  ),