Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -8,7 +8,7 @@ from huggingface_hub import hf_hub_download
|
|
8 |
|
9 |
# ----- Initialization ------
|
10 |
model_name_or_path = "TheBloke/Llama-2-13B-chat-GGUF"
|
11 |
-
model_basename = "llama-2-13b-chat.Q5_K_M.gguf"
|
12 |
|
13 |
model_path = hf_hub_download(
|
14 |
repo_id=model_name_or_path,
|
@@ -39,9 +39,8 @@ def transcribe_audio(audio_file):
|
|
39 |
def generate_response(prompt, max_tokens=150, temperature=0.7):
|
40 |
"""
|
41 |
Uses LLAMA-CPP to generate a response for the given prompt.
|
42 |
-
Note: Removed echo=True to avoid repeating the prompt.
|
43 |
"""
|
44 |
-
output = llm(prompt, max_tokens=max_tokens, temperature=temperature, stop=["User:"])
|
45 |
response = output["choices"][0]["text"]
|
46 |
return response.strip()
|
47 |
|
@@ -56,7 +55,7 @@ def voice_chat(audio, text, history, max_tokens, temperature):
|
|
56 |
"""
|
57 |
Handles a single turn of the conversation:
|
58 |
- If an audio file is provided and no text message, transcribe it.
|
59 |
-
- Builds a prompt using only the current user input.
|
60 |
- Generates a response from LLAMA.
|
61 |
- Converts the assistant's response to speech.
|
62 |
Returns:
|
@@ -71,8 +70,13 @@ def voice_chat(audio, text, history, max_tokens, temperature):
|
|
71 |
else:
|
72 |
user_input = text if text else ""
|
73 |
|
74 |
-
#
|
75 |
-
|
|
|
|
|
|
|
|
|
|
|
76 |
|
77 |
# Generate response using LLAMA-CPP.
|
78 |
response_text = generate_response(prompt, max_tokens=max_tokens, temperature=temperature)
|
|
|
8 |
|
9 |
# ----- Initialization ------
|
10 |
model_name_or_path = "TheBloke/Llama-2-13B-chat-GGUF"
|
11 |
+
model_basename = "llama-2-13b-chat.Q5_K_M.gguf"
|
12 |
|
13 |
model_path = hf_hub_download(
|
14 |
repo_id=model_name_or_path,
|
|
|
39 |
def generate_response(prompt, max_tokens=150, temperature=0.7):
|
40 |
"""
|
41 |
Uses LLAMA-CPP to generate a response for the given prompt.
|
|
|
42 |
"""
|
43 |
+
output = llm(prompt, max_tokens=max_tokens, temperature=temperature, stop=["User:"])
|
44 |
response = output["choices"][0]["text"]
|
45 |
return response.strip()
|
46 |
|
|
|
55 |
"""
|
56 |
Handles a single turn of the conversation:
|
57 |
- If an audio file is provided and no text message, transcribe it.
|
58 |
+
- Builds a prompt using only the current user input with additional instructions.
|
59 |
- Generates a response from LLAMA.
|
60 |
- Converts the assistant's response to speech.
|
61 |
Returns:
|
|
|
70 |
else:
|
71 |
user_input = text if text else ""
|
72 |
|
73 |
+
# Additional system instructions for improved behavior.
|
74 |
+
system_prompt = ("You are a helpful, knowledgeable, and concise assistant. "
|
75 |
+
"Provide accurate, factual, and polite responses. "
|
76 |
+
"Answer the user's question directly without unnecessary commentary.")
|
77 |
+
|
78 |
+
# Build prompt using the system instructions plus the current user input.
|
79 |
+
prompt = f"{system_prompt}\n\nUser: {user_input}\nAssistant: "
|
80 |
|
81 |
# Generate response using LLAMA-CPP.
|
82 |
response_text = generate_response(prompt, max_tokens=max_tokens, temperature=temperature)
|