devkushal75 commited on
Commit
9b4fabe
·
verified ·
1 Parent(s): 49f548c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -6
app.py CHANGED
@@ -8,7 +8,7 @@ from huggingface_hub import hf_hub_download
8
 
9
  # ----- Initialization ------
10
  model_name_or_path = "TheBloke/Llama-2-13B-chat-GGUF"
11
- model_basename = "llama-2-13b-chat.Q5_K_M.gguf" # the model is in GGUF format
12
 
13
  model_path = hf_hub_download(
14
  repo_id=model_name_or_path,
@@ -39,9 +39,8 @@ def transcribe_audio(audio_file):
39
  def generate_response(prompt, max_tokens=150, temperature=0.7):
40
  """
41
  Uses LLAMA-CPP to generate a response for the given prompt.
42
- Note: Removed echo=True to avoid repeating the prompt.
43
  """
44
- output = llm(prompt, max_tokens=max_tokens, temperature=temperature, stop=["User:"]) # echo removed
45
  response = output["choices"][0]["text"]
46
  return response.strip()
47
 
@@ -56,7 +55,7 @@ def voice_chat(audio, text, history, max_tokens, temperature):
56
  """
57
  Handles a single turn of the conversation:
58
  - If an audio file is provided and no text message, transcribe it.
59
- - Builds a prompt using only the current user input.
60
  - Generates a response from LLAMA.
61
  - Converts the assistant's response to speech.
62
  Returns:
@@ -71,8 +70,13 @@ def voice_chat(audio, text, history, max_tokens, temperature):
71
  else:
72
  user_input = text if text else ""
73
 
74
- # Build prompt using only the current user input.
75
- prompt = f"User: {user_input}\nAssistant: "
 
 
 
 
 
76
 
77
  # Generate response using LLAMA-CPP.
78
  response_text = generate_response(prompt, max_tokens=max_tokens, temperature=temperature)
 
8
 
9
  # ----- Initialization ------
10
  model_name_or_path = "TheBloke/Llama-2-13B-chat-GGUF"
11
+ model_basename = "llama-2-13b-chat.Q5_K_M.gguf"
12
 
13
  model_path = hf_hub_download(
14
  repo_id=model_name_or_path,
 
39
  def generate_response(prompt, max_tokens=150, temperature=0.7):
40
  """
41
  Uses LLAMA-CPP to generate a response for the given prompt.
 
42
  """
43
+ output = llm(prompt, max_tokens=max_tokens, temperature=temperature, stop=["User:"])
44
  response = output["choices"][0]["text"]
45
  return response.strip()
46
 
 
55
  """
56
  Handles a single turn of the conversation:
57
  - If an audio file is provided and no text message, transcribe it.
58
+ - Builds a prompt using only the current user input with additional instructions.
59
  - Generates a response from LLAMA.
60
  - Converts the assistant's response to speech.
61
  Returns:
 
70
  else:
71
  user_input = text if text else ""
72
 
73
+ # Additional system instructions for improved behavior.
74
+ system_prompt = ("You are a helpful, knowledgeable, and concise assistant. "
75
+ "Provide accurate, factual, and polite responses. "
76
+ "Answer the user's question directly without unnecessary commentary.")
77
+
78
+ # Build prompt using the system instructions plus the current user input.
79
+ prompt = f"{system_prompt}\n\nUser: {user_input}\nAssistant: "
80
 
81
  # Generate response using LLAMA-CPP.
82
  response_text = generate_response(prompt, max_tokens=max_tokens, temperature=temperature)