TEST_HL / app.py
devkushal75's picture
Update app.py
c1533fd verified
raw
history blame
4.68 kB
import gradio as gr
from llama_cpp import Llama
import whisper
from gtts import gTTS
import tempfile
import os
from huggingface_hub import hf_hub_download
# ----- Initialization -----
model_path = hf_hub_download(repo_id=TheBloke/Llama-2-7B-GGUF, filename=llama-2-7b.Q2_K.gguf)
# Initialize the LLAMA model. Update the model_path to point to your model file.
llm = Llama(
model_path=model_path,
n_threads=2, # CPU cores
n_batch=512, # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
n_gpu_layers=43, # Change this value based on your model and your GPU VRAM pool.
n_ctx=4096, # Context window
)
# Load the Whisper model for speech-to-text transcription.
whisper_model = whisper.load_model("base")
# ----- Helper Functions -----
def transcribe_audio(audio_file):
"""
Transcribes the provided audio file using Whisper.
"""
if audio_file is None:
return ""
result = whisper_model.transcribe(audio_file)
return result["text"]
def generate_response(prompt, max_tokens=150, temperature=0.7):
"""
Uses LLAMA-CPP to generate a response for the given prompt.
"""
# Call the LLAMA model. The output is a dict with a "choices" list.
output = llm(prompt, max_tokens=max_tokens, temperature=temperature, echo=True)
response = output["choices"][0]["text"]
return response.strip()
def text_to_speech(text):
"""
Converts text to speech using gTTS and returns the filepath to the saved audio.
"""
tts = gTTS(text=text, lang="en")
tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
tts.save(tmp_file.name)
return tmp_file.name
def voice_chat(audio, text, history, max_tokens, temperature):
"""
Handles a single turn of the conversation:
- If an audio file is provided and no text message, transcribe it.
- Builds a conversation prompt from the chat history.
- Generates a response from LLAMA.
- Converts the response to speech.
Returns updated chat history, the response text, the response audio filepath, and updated state.
"""
# Use the transcribed audio if text is empty.
if audio is not None and (text is None or text.strip() == ""):
user_input = transcribe_audio(audio)
else:
user_input = text if text else ""
# Build the conversation prompt (history is a list of tuples: (user, assistant))
prompt = ""
if history:
for (user_turn, bot_turn) in history:
prompt += f"User: {user_turn}\nAssistant: {bot_turn}\n"
prompt += f"User: {user_input}\nAssistant: "
# Generate response using LLAMA-CPP.
response_text = generate_response(prompt, max_tokens=max_tokens, temperature=temperature)
# Convert the response to speech audio.
audio_response = text_to_speech(response_text)
# Append this turn to the conversation history.
new_history = history.copy() if history else []
new_history.append((user_input, response_text))
# Return four outputs: update the Chatbot display, show the assistant text, play audio, and update state.
return new_history, response_text, audio_response, new_history
# ----- Gradio Interface -----
with gr.Blocks() as demo:
gr.Markdown("# Voice Chatbot with LLAMA‑CPP")
with gr.Row():
with gr.Column(scale=5):
# User inputs: Audio input and/or text input.
audio_input = gr.Audio(type="filepath", label="Speak to Chatbot")
text_input = gr.Textbox(placeholder="Or type your message", label="Your Message")
send_btn = gr.Button("Send")
max_tokens_slider = gr.Slider(50, 300, value=150, step=10, label="Max Tokens")
temperature_slider = gr.Slider(0.1, 1.0, value=0.7, step=0.1, label="Temperature")
with gr.Column(scale=7):
# Display outputs: Chat history, assistant text response, and audio playback.
chat_history = gr.Chatbot(label="Chat History")
response_textbox = gr.Textbox(label="Assistant Response")
audio_output = gr.Audio(label="Response Audio", type="filepath")
# Gradio State to hold the conversation history.
state = gr.State([])
def run_voice_chat(audio, text, history, max_tokens, temperature):
return voice_chat(audio, text, history, max_tokens, temperature)
# On clicking the "Send" button, run the voice_chat function.
send_btn.click(
fn=run_voice_chat,
inputs=[audio_input, text_input, state, max_tokens_slider, temperature_slider],
outputs=[chat_history, response_textbox, audio_output, state]
)
# Launch the app.
demo.launch()