devkushal75 commited on
Commit
02819d3
·
verified ·
1 Parent(s): 882e54f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -31
app.py CHANGED
@@ -6,35 +6,31 @@ import tempfile
6
  import os
7
  from huggingface_hub import hf_hub_download
8
 
9
-
10
  # ----- Initialization -----
11
  model_name_or_path = "TheBloke/Llama-2-13B-chat-GGUF"
12
- model_basename = "llama-2-13b-chat.Q5_K_M.gguf" # the model is in gguf format
13
 
14
  model_path = hf_hub_download(
15
  repo_id=model_name_or_path,
16
  filename=model_basename
17
  )
18
 
19
- # Initialize the LLAMA model. Update the model_path to point to your model file.
20
  llm = Llama(
21
  model_path=model_path,
22
- n_threads=2, # CPU cores
23
- n_batch=512, # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
24
- n_gpu_layers=43, # Change this value based on your model and your GPU VRAM pool.
25
- n_ctx=4096, # Context window
26
  )
27
 
28
-
29
  # Load the Whisper model for speech-to-text transcription.
30
  whisper_model = whisper.load_model("base")
31
 
32
  # ----- Helper Functions -----
33
 
34
  def transcribe_audio(audio_file):
35
- """
36
- Transcribes the provided audio file using Whisper.
37
- """
38
  if audio_file is None:
39
  return ""
40
  result = whisper_model.transcribe(audio_file)
@@ -43,16 +39,14 @@ def transcribe_audio(audio_file):
43
  def generate_response(prompt, max_tokens=150, temperature=0.7):
44
  """
45
  Uses LLAMA-CPP to generate a response for the given prompt.
 
46
  """
47
- # Call the LLAMA model. The output is a dict with a "choices" list.
48
- output = llm(prompt, max_tokens=max_tokens, temperature=temperature, echo=True)
49
  response = output["choices"][0]["text"]
50
  return response.strip()
51
 
52
  def text_to_speech(text):
53
- """
54
- Converts text to speech using gTTS and returns the filepath to the saved audio.
55
- """
56
  tts = gTTS(text=text, lang="en")
57
  tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
58
  tts.save(tmp_file.name)
@@ -62,10 +56,14 @@ def voice_chat(audio, text, history, max_tokens, temperature):
62
  """
63
  Handles a single turn of the conversation:
64
  - If an audio file is provided and no text message, transcribe it.
65
- - Builds a conversation prompt from the chat history.
66
  - Generates a response from LLAMA.
67
- - Converts the response to speech.
68
- Returns updated chat history, the response text, the response audio filepath, and updated state.
 
 
 
 
69
  """
70
  # Use the transcribed audio if text is empty.
71
  if audio is not None and (text is None or text.strip() == ""):
@@ -73,23 +71,18 @@ def voice_chat(audio, text, history, max_tokens, temperature):
73
  else:
74
  user_input = text if text else ""
75
 
76
- # Build the conversation prompt (history is a list of tuples: (user, assistant))
77
- prompt = ""
78
- if history:
79
- for (user_turn, bot_turn) in history:
80
- prompt += f"User: {user_turn}\nAssistant: {bot_turn}\n"
81
- prompt += f"User: {user_input}\nAssistant: "
82
 
83
  # Generate response using LLAMA-CPP.
84
  response_text = generate_response(prompt, max_tokens=max_tokens, temperature=temperature)
85
- # Convert the response to speech audio.
 
86
  audio_response = text_to_speech(response_text)
87
 
88
- # Append this turn to the conversation history.
89
- new_history = history.copy() if history else []
90
- new_history.append((user_input, response_text))
91
 
92
- # Return four outputs: update the Chatbot display, show the assistant text, play audio, and update state.
93
  return new_history, response_text, audio_response, new_history
94
 
95
  # ----- Gradio Interface -----
@@ -125,4 +118,4 @@ with gr.Blocks() as demo:
125
  )
126
 
127
  # Launch the app.
128
- demo.launch()
 
6
  import os
7
  from huggingface_hub import hf_hub_download
8
 
 
9
  # ----- Initialization -----
10
  model_name_or_path = "TheBloke/Llama-2-13B-chat-GGUF"
11
+ model_basename = "llama-2-13b-chat.Q5_K_M.gguf" # the model is in GGUF format
12
 
13
  model_path = hf_hub_download(
14
  repo_id=model_name_or_path,
15
  filename=model_basename
16
  )
17
 
18
+ # Initialize the LLAMA model.
19
  llm = Llama(
20
  model_path=model_path,
21
+ n_threads=2, # CPU cores
22
+ n_batch=512,
23
+ n_gpu_layers=43,
24
+ n_ctx=4096,
25
  )
26
 
 
27
  # Load the Whisper model for speech-to-text transcription.
28
  whisper_model = whisper.load_model("base")
29
 
30
  # ----- Helper Functions -----
31
 
32
  def transcribe_audio(audio_file):
33
+ """Transcribes the provided audio file using Whisper."""
 
 
34
  if audio_file is None:
35
  return ""
36
  result = whisper_model.transcribe(audio_file)
 
39
  def generate_response(prompt, max_tokens=150, temperature=0.7):
40
  """
41
  Uses LLAMA-CPP to generate a response for the given prompt.
42
+ Note: Removed echo=True to avoid repeating the prompt.
43
  """
44
+ output = llm(prompt, max_tokens=max_tokens, temperature=temperature) # echo removed
 
45
  response = output["choices"][0]["text"]
46
  return response.strip()
47
 
48
  def text_to_speech(text):
49
+ """Converts text to speech using gTTS and returns the filepath to the saved audio."""
 
 
50
  tts = gTTS(text=text, lang="en")
51
  tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
52
  tts.save(tmp_file.name)
 
56
  """
57
  Handles a single turn of the conversation:
58
  - If an audio file is provided and no text message, transcribe it.
59
+ - Builds a prompt using only the current user input.
60
  - Generates a response from LLAMA.
61
+ - Converts the assistant's response to speech.
62
+ Returns:
63
+ - A new history containing only the current turn.
64
+ - The assistant's response text.
65
+ - The assistant's response audio filepath.
66
+ - Updated state (new history).
67
  """
68
  # Use the transcribed audio if text is empty.
69
  if audio is not None and (text is None or text.strip() == ""):
 
71
  else:
72
  user_input = text if text else ""
73
 
74
+ # Build prompt using only the current user input.
75
+ prompt = f"User: {user_input}\nAssistant: "
 
 
 
 
76
 
77
  # Generate response using LLAMA-CPP.
78
  response_text = generate_response(prompt, max_tokens=max_tokens, temperature=temperature)
79
+
80
+ # Convert only the assistant's response to speech.
81
  audio_response = text_to_speech(response_text)
82
 
83
+ # Reset conversation history to only include the current turn.
84
+ new_history = [(user_input, response_text)]
 
85
 
 
86
  return new_history, response_text, audio_response, new_history
87
 
88
  # ----- Gradio Interface -----
 
118
  )
119
 
120
  # Launch the app.
121
+ demo.launch()