devkushal75 commited on
Commit
4d5fc75
·
verified ·
1 Parent(s): fab8000

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -21
app.py CHANGED
@@ -6,35 +6,31 @@ import tempfile
6
  import os
7
  from huggingface_hub import hf_hub_download
8
 
9
-
10
  # ----- Initialization -----
11
  model_name_or_path = "TheBloke/Llama-2-13B-chat-GGUF"
12
- model_basename = "llama-2-13b-chat.Q5_K_M.gguf" # the model is in gguf format
13
 
14
  model_path = hf_hub_download(
15
  repo_id=model_name_or_path,
16
  filename=model_basename
17
  )
18
 
19
- # Initialize the LLAMA model. Update the model_path to point to your model file.
20
  llm = Llama(
21
  model_path=model_path,
22
- n_threads=2, # CPU cores
23
- n_batch=512, # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
24
- n_gpu_layers=43, # Change this value based on your model and your GPU VRAM pool.
25
- n_ctx=4096, # Context window
26
  )
27
 
28
-
29
  # Load the Whisper model for speech-to-text transcription.
30
  whisper_model = whisper.load_model("base")
31
 
32
  # ----- Helper Functions -----
33
 
34
  def transcribe_audio(audio_file):
35
- """
36
- Transcribes the provided audio file using Whisper.
37
- """
38
  if audio_file is None:
39
  return ""
40
  result = whisper_model.transcribe(audio_file)
@@ -43,43 +39,52 @@ def transcribe_audio(audio_file):
43
  def generate_response(prompt, max_tokens=150, temperature=0.7):
44
  """
45
  Uses LLAMA-CPP to generate a response for the given prompt.
 
46
  """
47
- # Call the LLAMA model. The output is a dict with a "choices" list.
48
- output = llm(prompt, max_tokens=max_tokens, temperature=temperature, echo=False)
49
  response = output["choices"][0]["text"]
50
  return response.strip()
51
 
52
  def text_to_speech(text):
53
- """
54
- Converts text to speech using gTTS and returns the filepath to the saved audio.
55
- """
56
  tts = gTTS(text=text, lang="en")
57
  tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
58
  tts.save(tmp_file.name)
59
  return tmp_file.name
60
 
61
  def voice_chat(audio, text, history, max_tokens, temperature):
 
 
 
 
 
 
 
 
 
 
 
 
62
  # Use the transcribed audio if text is empty.
63
  if audio is not None and (text is None or text.strip() == ""):
64
  user_input = transcribe_audio(audio)
65
  else:
66
  user_input = text if text else ""
67
 
68
- # Build the prompt using only the current message (ignoring previous history)
69
  prompt = f"User: {user_input}\nAssistant: "
70
-
71
  # Generate response using LLAMA-CPP.
72
  response_text = generate_response(prompt, max_tokens=max_tokens, temperature=temperature)
73
  # Convert only the assistant's response to speech.
74
  audio_response = text_to_speech(response_text)
75
 
76
- # Instead of accumulating history, just return the current exchange.
77
  new_history = [(user_input, response_text)]
78
 
79
- # Return outputs: updated chatbot display, assistant text, audio file, and updated state.
80
  return new_history, response_text, audio_response, new_history
81
 
82
-
83
  # ----- Gradio Interface -----
84
 
85
  with gr.Blocks() as demo:
 
6
  import os
7
  from huggingface_hub import hf_hub_download
8
 
 
9
  # ----- Initialization -----
10
  model_name_or_path = "TheBloke/Llama-2-13B-chat-GGUF"
11
+ model_basename = "llama-2-13b-chat.Q5_K_M.gguf" # the model is in GGUF format
12
 
13
  model_path = hf_hub_download(
14
  repo_id=model_name_or_path,
15
  filename=model_basename
16
  )
17
 
18
+ # Initialize the LLAMA model.
19
  llm = Llama(
20
  model_path=model_path,
21
+ n_threads=2,
22
+ n_batch=512,
23
+ n_gpu_layers=43,
24
+ n_ctx=4096,
25
  )
26
 
 
27
  # Load the Whisper model for speech-to-text transcription.
28
  whisper_model = whisper.load_model("base")
29
 
30
  # ----- Helper Functions -----
31
 
32
  def transcribe_audio(audio_file):
33
+ """Transcribes the provided audio file using Whisper."""
 
 
34
  if audio_file is None:
35
  return ""
36
  result = whisper_model.transcribe(audio_file)
 
39
  def generate_response(prompt, max_tokens=150, temperature=0.7):
40
  """
41
  Uses LLAMA-CPP to generate a response for the given prompt.
42
+ Note: Removed echo=True to prevent repeating the prompt.
43
  """
44
+ output = llm(prompt, max_tokens=max_tokens, temperature=temperature) # echo removed
 
45
  response = output["choices"][0]["text"]
46
  return response.strip()
47
 
48
  def text_to_speech(text):
49
+ """Converts text to speech using gTTS and returns the filepath to the saved audio."""
 
 
50
  tts = gTTS(text=text, lang="en")
51
  tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
52
  tts.save(tmp_file.name)
53
  return tmp_file.name
54
 
55
  def voice_chat(audio, text, history, max_tokens, temperature):
56
+ """
57
+ Handles a single turn of the conversation:
58
+ - If an audio file is provided and no text message, transcribe it.
59
+ - Builds a prompt with only the current user input.
60
+ - Generates a response from LLAMA.
61
+ - Converts the assistant's response to speech.
62
+ Returns:
63
+ - A new history containing only the current turn.
64
+ - The assistant's response text.
65
+ - The assistant's response audio filepath.
66
+ - The updated state (new history).
67
+ """
68
  # Use the transcribed audio if text is empty.
69
  if audio is not None and (text is None or text.strip() == ""):
70
  user_input = transcribe_audio(audio)
71
  else:
72
  user_input = text if text else ""
73
 
74
+ # Build prompt without prior history.
75
  prompt = f"User: {user_input}\nAssistant: "
76
+
77
  # Generate response using LLAMA-CPP.
78
  response_text = generate_response(prompt, max_tokens=max_tokens, temperature=temperature)
79
  # Convert only the assistant's response to speech.
80
  audio_response = text_to_speech(response_text)
81
 
82
+ # Create new history with only the current exchange.
83
  new_history = [(user_input, response_text)]
84
 
85
+ # Return the outputs.
86
  return new_history, response_text, audio_response, new_history
87
 
 
88
  # ----- Gradio Interface -----
89
 
90
  with gr.Blocks() as demo: