Tijmen2 commited on
Commit
4775357
·
verified ·
1 Parent(s): c94cc88

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -4
app.py CHANGED
@@ -11,6 +11,7 @@ llm = Llama(
11
  model_path=model_path,
12
  n_ctx=2048,
13
  n_threads=4,
 
14
  seed=42,
15
  f16_kv=True,
16
  logits_all=False,
@@ -27,14 +28,15 @@ def respond(message, history, system_message, max_tokens, temperature, top_p):
27
  messages.append({"role": "assistant", "content": assistant_msg})
28
  messages.append({"role": "user", "content": message})
29
 
30
- response = llm.generate_chat(
31
- messages,
32
  max_tokens=max_tokens,
33
  temperature=temperature,
34
  top_p=top_p
35
  )
36
 
37
- return response
 
38
 
39
  demo = gr.ChatInterface(
40
  respond,
@@ -46,6 +48,5 @@ demo = gr.ChatInterface(
46
  ]
47
  )
48
 
49
-
50
  if __name__ == "__main__":
51
  demo.launch()
 
11
  model_path=model_path,
12
  n_ctx=2048,
13
  n_threads=4,
14
+ chat_format="llama-2",
15
  seed=42,
16
  f16_kv=True,
17
  logits_all=False,
 
28
  messages.append({"role": "assistant", "content": assistant_msg})
29
  messages.append({"role": "user", "content": message})
30
 
31
+ response = llm.create_chat_completion(
32
+ messages=messages,
33
  max_tokens=max_tokens,
34
  temperature=temperature,
35
  top_p=top_p
36
  )
37
 
38
+ # Extract the assistant's message from the response
39
+ return response["choices"][0]["message"]["content"]
40
 
41
  demo = gr.ChatInterface(
42
  respond,
 
48
  ]
49
  )
50
 
 
51
  if __name__ == "__main__":
52
  demo.launch()