Update app.py
Browse files
app.py
CHANGED
@@ -11,6 +11,7 @@ llm = Llama(
|
|
11 |
model_path=model_path,
|
12 |
n_ctx=2048,
|
13 |
n_threads=4,
|
|
|
14 |
seed=42,
|
15 |
f16_kv=True,
|
16 |
logits_all=False,
|
@@ -27,14 +28,15 @@ def respond(message, history, system_message, max_tokens, temperature, top_p):
|
|
27 |
messages.append({"role": "assistant", "content": assistant_msg})
|
28 |
messages.append({"role": "user", "content": message})
|
29 |
|
30 |
-
response = llm.
|
31 |
-
messages,
|
32 |
max_tokens=max_tokens,
|
33 |
temperature=temperature,
|
34 |
top_p=top_p
|
35 |
)
|
36 |
|
37 |
-
|
|
|
38 |
|
39 |
demo = gr.ChatInterface(
|
40 |
respond,
|
@@ -46,6 +48,5 @@ demo = gr.ChatInterface(
|
|
46 |
]
|
47 |
)
|
48 |
|
49 |
-
|
50 |
if __name__ == "__main__":
|
51 |
demo.launch()
|
|
|
11 |
model_path=model_path,
|
12 |
n_ctx=2048,
|
13 |
n_threads=4,
|
14 |
+
chat_format="llama-2",
|
15 |
seed=42,
|
16 |
f16_kv=True,
|
17 |
logits_all=False,
|
|
|
28 |
messages.append({"role": "assistant", "content": assistant_msg})
|
29 |
messages.append({"role": "user", "content": message})
|
30 |
|
31 |
+
response = llm.create_chat_completion(
|
32 |
+
messages=messages,
|
33 |
max_tokens=max_tokens,
|
34 |
temperature=temperature,
|
35 |
top_p=top_p
|
36 |
)
|
37 |
|
38 |
+
# Extract the assistant's message from the response
|
39 |
+
return response["choices"][0]["message"]["content"]
|
40 |
|
41 |
demo = gr.ChatInterface(
|
42 |
respond,
|
|
|
48 |
]
|
49 |
)
|
50 |
|
|
|
51 |
if __name__ == "__main__":
|
52 |
demo.launch()
|