Update app.py
Browse files
app.py
CHANGED
@@ -12,16 +12,9 @@ model_path = hf_hub_download(
|
|
12 |
|
13 |
llm = Llama(
|
14 |
model_path=model_path,
|
15 |
-
|
16 |
-
#n_threads=8,
|
17 |
chat_format="llama-3",
|
18 |
-
#
|
19 |
-
#f16_kv=True,
|
20 |
-
#logits_all=False,
|
21 |
-
#use_mmap=True,
|
22 |
-
#use_gpu=True,
|
23 |
-
#n_gpu_layers=-1, # to ensure all layers are on GPU
|
24 |
-
#offload_kqv=True # for better memory management
|
25 |
)
|
26 |
|
27 |
# Placeholder responses for when context is empty
|
|
|
12 |
|
13 |
llm = Llama(
|
14 |
model_path=model_path,
|
15 |
+
n_ctx=2048,
|
|
|
16 |
chat_format="llama-3",
|
17 |
+
n_gpu_layers=-1, # ensure all layers are on GPU
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
)
|
19 |
|
20 |
# Placeholder responses for when context is empty
|