devkushal75 commited on
Commit
882e54f
·
verified ·
1 Parent(s): 4d5fc75

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -24
app.py CHANGED
@@ -6,31 +6,35 @@ import tempfile
6
  import os
7
  from huggingface_hub import hf_hub_download
8
 
 
9
  # ----- Initialization -----
10
  model_name_or_path = "TheBloke/Llama-2-13B-chat-GGUF"
11
- model_basename = "llama-2-13b-chat.Q5_K_M.gguf" # the model is in GGUF format
12
 
13
  model_path = hf_hub_download(
14
  repo_id=model_name_or_path,
15
  filename=model_basename
16
  )
17
 
18
- # Initialize the LLAMA model.
19
  llm = Llama(
20
  model_path=model_path,
21
- n_threads=2,
22
- n_batch=512,
23
- n_gpu_layers=43,
24
- n_ctx=4096,
25
  )
26
 
 
27
  # Load the Whisper model for speech-to-text transcription.
28
  whisper_model = whisper.load_model("base")
29
 
30
  # ----- Helper Functions -----
31
 
32
  def transcribe_audio(audio_file):
33
- """Transcribes the provided audio file using Whisper."""
 
 
34
  if audio_file is None:
35
  return ""
36
  result = whisper_model.transcribe(audio_file)
@@ -39,14 +43,16 @@ def transcribe_audio(audio_file):
39
  def generate_response(prompt, max_tokens=150, temperature=0.7):
40
  """
41
  Uses LLAMA-CPP to generate a response for the given prompt.
42
- Note: Removed echo=True to prevent repeating the prompt.
43
  """
44
- output = llm(prompt, max_tokens=max_tokens, temperature=temperature) # echo removed
 
45
  response = output["choices"][0]["text"]
46
  return response.strip()
47
 
48
  def text_to_speech(text):
49
- """Converts text to speech using gTTS and returns the filepath to the saved audio."""
 
 
50
  tts = gTTS(text=text, lang="en")
51
  tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
52
  tts.save(tmp_file.name)
@@ -56,14 +62,10 @@ def voice_chat(audio, text, history, max_tokens, temperature):
56
  """
57
  Handles a single turn of the conversation:
58
  - If an audio file is provided and no text message, transcribe it.
59
- - Builds a prompt with only the current user input.
60
  - Generates a response from LLAMA.
61
- - Converts the assistant's response to speech.
62
- Returns:
63
- - A new history containing only the current turn.
64
- - The assistant's response text.
65
- - The assistant's response audio filepath.
66
- - The updated state (new history).
67
  """
68
  # Use the transcribed audio if text is empty.
69
  if audio is not None and (text is None or text.strip() == ""):
@@ -71,18 +73,23 @@ def voice_chat(audio, text, history, max_tokens, temperature):
71
  else:
72
  user_input = text if text else ""
73
 
74
- # Build prompt without prior history.
75
- prompt = f"User: {user_input}\nAssistant: "
 
 
 
 
76
 
77
  # Generate response using LLAMA-CPP.
78
  response_text = generate_response(prompt, max_tokens=max_tokens, temperature=temperature)
79
- # Convert only the assistant's response to speech.
80
  audio_response = text_to_speech(response_text)
81
 
82
- # Create new history with only the current exchange.
83
- new_history = [(user_input, response_text)]
 
84
 
85
- # Return the outputs.
86
  return new_history, response_text, audio_response, new_history
87
 
88
  # ----- Gradio Interface -----
@@ -118,4 +125,4 @@ with gr.Blocks() as demo:
118
  )
119
 
120
  # Launch the app.
121
- demo.launch()
 
6
  import os
7
  from huggingface_hub import hf_hub_download
8
 
9
+
10
  # ----- Initialization -----
11
  model_name_or_path = "TheBloke/Llama-2-13B-chat-GGUF"
12
+ model_basename = "llama-2-13b-chat.Q5_K_M.gguf" # the model is in gguf format
13
 
14
  model_path = hf_hub_download(
15
  repo_id=model_name_or_path,
16
  filename=model_basename
17
  )
18
 
19
+ # Initialize the LLAMA model. Update the model_path to point to your model file.
20
  llm = Llama(
21
  model_path=model_path,
22
+ n_threads=2, # CPU cores
23
+ n_batch=512, # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
24
+ n_gpu_layers=43, # Change this value based on your model and your GPU VRAM pool.
25
+ n_ctx=4096, # Context window
26
  )
27
 
28
+
29
  # Load the Whisper model for speech-to-text transcription.
30
  whisper_model = whisper.load_model("base")
31
 
32
  # ----- Helper Functions -----
33
 
34
  def transcribe_audio(audio_file):
35
+ """
36
+ Transcribes the provided audio file using Whisper.
37
+ """
38
  if audio_file is None:
39
  return ""
40
  result = whisper_model.transcribe(audio_file)
 
43
  def generate_response(prompt, max_tokens=150, temperature=0.7):
44
  """
45
  Uses LLAMA-CPP to generate a response for the given prompt.
 
46
  """
47
+ # Call the LLAMA model. The output is a dict with a "choices" list.
48
+ output = llm(prompt, max_tokens=max_tokens, temperature=temperature, echo=True)
49
  response = output["choices"][0]["text"]
50
  return response.strip()
51
 
52
  def text_to_speech(text):
53
+ """
54
+ Converts text to speech using gTTS and returns the filepath to the saved audio.
55
+ """
56
  tts = gTTS(text=text, lang="en")
57
  tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
58
  tts.save(tmp_file.name)
 
62
  """
63
  Handles a single turn of the conversation:
64
  - If an audio file is provided and no text message, transcribe it.
65
+ - Builds a conversation prompt from the chat history.
66
  - Generates a response from LLAMA.
67
+ - Converts the response to speech.
68
+ Returns updated chat history, the response text, the response audio filepath, and updated state.
 
 
 
 
69
  """
70
  # Use the transcribed audio if text is empty.
71
  if audio is not None and (text is None or text.strip() == ""):
 
73
  else:
74
  user_input = text if text else ""
75
 
76
+ # Build the conversation prompt (history is a list of tuples: (user, assistant))
77
+ prompt = ""
78
+ if history:
79
+ for (user_turn, bot_turn) in history:
80
+ prompt += f"User: {user_turn}\nAssistant: {bot_turn}\n"
81
+ prompt += f"User: {user_input}\nAssistant: "
82
 
83
  # Generate response using LLAMA-CPP.
84
  response_text = generate_response(prompt, max_tokens=max_tokens, temperature=temperature)
85
+ # Convert the response to speech audio.
86
  audio_response = text_to_speech(response_text)
87
 
88
+ # Append this turn to the conversation history.
89
+ new_history = history.copy() if history else []
90
+ new_history.append((user_input, response_text))
91
 
92
+ # Return four outputs: update the Chatbot display, show the assistant text, play audio, and update state.
93
  return new_history, response_text, audio_response, new_history
94
 
95
  # ----- Gradio Interface -----
 
125
  )
126
 
127
  # Launch the app.
128
+ demo.launch()