Spaces:

devkushal75
/

TEST_HL

Sleeping

devkushal75 commited on 24 days ago

Commit

9b4fabe

verified ·

1 Parent(s): 49f548c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -8,7 +8,7 @@ from huggingface_hub import hf_hub_download
 # ----- Initialization ------
 model_name_or_path = "TheBloke/Llama-2-13B-chat-GGUF"
-model_basename = "llama-2-13b-chat.Q5_K_M.gguf"  # the model is in GGUF format
 model_path = hf_hub_download(
     repo_id=model_name_or_path,
@@ -39,9 +39,8 @@ def transcribe_audio(audio_file):
 def generate_response(prompt, max_tokens=150, temperature=0.7):
     """
     Uses LLAMA-CPP to generate a response for the given prompt.
-    Note: Removed echo=True to avoid repeating the prompt.
     """
-    output = llm(prompt, max_tokens=max_tokens, temperature=temperature, stop=["User:"])  # echo removed
     response = output["choices"][0]["text"]
     return response.strip()
@@ -56,7 +55,7 @@ def voice_chat(audio, text, history, max_tokens, temperature):
     """
     Handles a single turn of the conversation:
       - If an audio file is provided and no text message, transcribe it.
-      - Builds a prompt using only the current user input.
       - Generates a response from LLAMA.
       - Converts the assistant's response to speech.
     Returns:
@@ -71,8 +70,13 @@ def voice_chat(audio, text, history, max_tokens, temperature):
     else:
         user_input = text if text else ""
-    # Build prompt using only the current user input.
-    prompt = f"User: {user_input}\nAssistant: "
     # Generate response using LLAMA-CPP.
     response_text = generate_response(prompt, max_tokens=max_tokens, temperature=temperature)

 # ----- Initialization ------
 model_name_or_path = "TheBloke/Llama-2-13B-chat-GGUF"
+model_basename = "llama-2-13b-chat.Q5_K_M.gguf"
 model_path = hf_hub_download(
     repo_id=model_name_or_path,
 def generate_response(prompt, max_tokens=150, temperature=0.7):
     """
     Uses LLAMA-CPP to generate a response for the given prompt.
     """
+    output = llm(prompt, max_tokens=max_tokens, temperature=temperature, stop=["User:"])
     response = output["choices"][0]["text"]
     return response.strip()
     """
     Handles a single turn of the conversation:
       - If an audio file is provided and no text message, transcribe it.
+      - Builds a prompt using only the current user input with additional instructions.
       - Generates a response from LLAMA.
       - Converts the assistant's response to speech.
     Returns:
     else:
         user_input = text if text else ""
+    # Additional system instructions for improved behavior.
+    system_prompt = ("You are a helpful, knowledgeable, and concise assistant. "
+                     "Provide accurate, factual, and polite responses. "
+                     "Answer the user's question directly without unnecessary commentary.")
+    # Build prompt using the system instructions plus the current user input.
+    prompt = f"{system_prompt}\n\nUser: {user_input}\nAssistant: "
     # Generate response using LLAMA-CPP.
     response_text = generate_response(prompt, max_tokens=max_tokens, temperature=temperature)