Tijmen2 commited on
Commit
3a2a3f4
·
verified ·
1 Parent(s): b2bc559

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -1
app.py CHANGED
@@ -15,6 +15,7 @@ llm = Llama(
15
  n_ctx=2048,
16
  chat_format="llama-3",
17
  n_gpu_layers=-1, # ensure all layers are on GPU
 
18
  )
19
 
20
  # Placeholder responses for when context is empty
@@ -62,7 +63,6 @@ def bot(history):
62
  temperature=0.7,
63
  top_p=0.95,
64
  stream=True,
65
- num_threads=1,
66
  )
67
 
68
  for chunk in response:
 
15
  n_ctx=2048,
16
  chat_format="llama-3",
17
  n_gpu_layers=-1, # ensure all layers are on GPU
18
+ flash_attn=True,
19
  )
20
 
21
  # Placeholder responses for when context is empty
 
63
  temperature=0.7,
64
  top_p=0.95,
65
  stream=True,
 
66
  )
67
 
68
  for chunk in response: