Tijmen2 commited on
Commit
18cc8ee
·
verified ·
1 Parent(s): 0513c2f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -9
app.py CHANGED
@@ -12,16 +12,9 @@ model_path = hf_hub_download(
12
 
13
  llm = Llama(
14
  model_path=model_path,
15
- #n_ctx=2048,
16
- #n_threads=8,
17
  chat_format="llama-3",
18
- #seed=42,
19
- #f16_kv=True,
20
- #logits_all=False,
21
- #use_mmap=True,
22
- #use_gpu=True,
23
- #n_gpu_layers=-1, # to ensure all layers are on GPU
24
- #offload_kqv=True # for better memory management
25
  )
26
 
27
  # Placeholder responses for when context is empty
 
12
 
13
  llm = Llama(
14
  model_path=model_path,
15
+ n_ctx=2048,
 
16
  chat_format="llama-3",
17
+ n_gpu_layers=-1, # ensure all layers are on GPU
 
 
 
 
 
 
18
  )
19
 
20
  # Placeholder responses for when context is empty