Spaces:
Running
Running
import gradio as gr | |
from llama_cpp import Llama | |
import whisper | |
from gtts import gTTS | |
import tempfile | |
import os | |
from huggingface_hub import hf_hub_download | |
# ----- Initialization ------ | |
model_name_or_path = "TheBloke/Llama-2-13B-chat-GGUF" | |
model_basename = "llama-2-13b-chat.Q5_K_M.gguf" | |
model_path = hf_hub_download( | |
repo_id=model_name_or_path, | |
filename=model_basename | |
) | |
# Initialize the LLAMA model. | |
llm = Llama( | |
model_path=model_path, | |
n_threads=2, # CPU cores | |
n_batch=512, | |
n_gpu_layers=43, | |
n_ctx=4096, | |
) | |
# Load the Whisper model for speech-to-text transcription. | |
whisper_model = whisper.load_model("base") | |
# ----- Helper Functions ----- | |
def transcribe_audio(audio_file): | |
"""Transcribes the provided audio file using Whisper.""" | |
if audio_file is None: | |
return "" | |
result = whisper_model.transcribe(audio_file) | |
return result["text"] | |
def generate_response(prompt, max_tokens=150, temperature=0.7): | |
""" | |
Uses LLAMA-CPP to generate a response for the given prompt. | |
""" | |
output = llm(prompt, max_tokens=max_tokens, temperature=temperature, stop=["User:"]) | |
response = output["choices"][0]["text"] | |
return response.strip() | |
def text_to_speech(text): | |
"""Converts text to speech using gTTS and returns the filepath to the saved audio.""" | |
tts = gTTS(text=text, lang="en") | |
tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") | |
tts.save(tmp_file.name) | |
return tmp_file.name | |
def voice_chat(audio, text, history, max_tokens, temperature): | |
""" | |
Handles a single turn of the conversation: | |
- If an audio file is provided and no text message, transcribe it. | |
- Builds a prompt using only the current user input with additional instructions. | |
- Generates a response from LLAMA. | |
- Converts the assistant's response to speech. | |
Returns: | |
- A new history containing only the current turn. | |
- The assistant's response text. | |
- The assistant's response audio filepath. | |
- Updated state (new history). | |
""" | |
# Use the transcribed audio if text is empty. | |
if audio is not None and (text is None or text.strip() == ""): | |
user_input = transcribe_audio(audio) | |
else: | |
user_input = text if text else "" | |
# Additional system instructions for improved behavior. | |
system_prompt = ("You are a helpful, knowledgeable, and concise assistant. " | |
"Provide accurate, factual, and polite responses. " | |
"Answer the user's question directly without unnecessary commentary.") | |
# Build prompt using the system instructions plus the current user input. | |
prompt = f"{system_prompt}\n\nUser: {user_input}\nAssistant: " | |
# Generate response using LLAMA-CPP. | |
response_text = generate_response(prompt, max_tokens=max_tokens, temperature=temperature) | |
# Convert only the assistant's response to speech. | |
audio_response = text_to_speech(response_text) | |
# Reset conversation history to only include the current turn. | |
new_history = [(user_input, response_text)] | |
return new_history, response_text, audio_response, new_history | |
# ----- Gradio Interface ----- | |
with gr.Blocks() as demo: | |
gr.Markdown("# Voice Chatbot with LLAMA‑CPP") | |
with gr.Row(): | |
with gr.Column(scale=5): | |
# User inputs: Audio input and/or text input. | |
audio_input = gr.Audio(type="filepath", label="Speak to Chatbot") | |
text_input = gr.Textbox(placeholder="Or type your message", label="Your Message") | |
send_btn = gr.Button("Send") | |
max_tokens_slider = gr.Slider(50, 300, value=150, step=10, label="Max Tokens") | |
temperature_slider = gr.Slider(0.1, 1.0, value=0.7, step=0.1, label="Temperature") | |
with gr.Column(scale=7): | |
# Display outputs: Chat history, assistant text response, and audio playback. | |
chat_history = gr.Chatbot(label="Chat History") | |
response_textbox = gr.Textbox(label="Assistant Response") | |
audio_output = gr.Audio(label="Response Audio", type="filepath") | |
# Gradio State to hold the conversation history. | |
state = gr.State([]) | |
def run_voice_chat(audio, text, history, max_tokens, temperature): | |
return voice_chat(audio, text, history, max_tokens, temperature) | |
# On clicking the "Send" button, run the voice_chat function. | |
send_btn.click( | |
fn=run_voice_chat, | |
inputs=[audio_input, text_input, state, max_tokens_slider, temperature_slider], | |
outputs=[chat_history, response_textbox, audio_output, state] | |
) | |
# Launch the app. | |
demo.launch() | |