Spaces:
Sleeping
Sleeping
import gradio as gr | |
from llama_cpp import Llama | |
import whisper | |
from gtts import gTTS | |
import tempfile | |
import os | |
from huggingface_hub import hf_hub_download | |
# ----- Initialization ----- | |
model_path = hf_hub_download(repo_id=TheBloke/Llama-2-7B-GGUF, filename=llama-2-7b.Q2_K.gguf) | |
# Initialize the LLAMA model. Update the model_path to point to your model file. | |
llm = Llama( | |
model_path=model_path, | |
n_threads=2, # CPU cores | |
n_batch=512, # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU. | |
n_gpu_layers=43, # Change this value based on your model and your GPU VRAM pool. | |
n_ctx=4096, # Context window | |
) | |
# Load the Whisper model for speech-to-text transcription. | |
whisper_model = whisper.load_model("base") | |
# ----- Helper Functions ----- | |
def transcribe_audio(audio_file): | |
""" | |
Transcribes the provided audio file using Whisper. | |
""" | |
if audio_file is None: | |
return "" | |
result = whisper_model.transcribe(audio_file) | |
return result["text"] | |
def generate_response(prompt, max_tokens=150, temperature=0.7): | |
""" | |
Uses LLAMA-CPP to generate a response for the given prompt. | |
""" | |
# Call the LLAMA model. The output is a dict with a "choices" list. | |
output = llm(prompt, max_tokens=max_tokens, temperature=temperature, echo=True) | |
response = output["choices"][0]["text"] | |
return response.strip() | |
def text_to_speech(text): | |
""" | |
Converts text to speech using gTTS and returns the filepath to the saved audio. | |
""" | |
tts = gTTS(text=text, lang="en") | |
tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") | |
tts.save(tmp_file.name) | |
return tmp_file.name | |
def voice_chat(audio, text, history, max_tokens, temperature): | |
""" | |
Handles a single turn of the conversation: | |
- If an audio file is provided and no text message, transcribe it. | |
- Builds a conversation prompt from the chat history. | |
- Generates a response from LLAMA. | |
- Converts the response to speech. | |
Returns updated chat history, the response text, the response audio filepath, and updated state. | |
""" | |
# Use the transcribed audio if text is empty. | |
if audio is not None and (text is None or text.strip() == ""): | |
user_input = transcribe_audio(audio) | |
else: | |
user_input = text if text else "" | |
# Build the conversation prompt (history is a list of tuples: (user, assistant)) | |
prompt = "" | |
if history: | |
for (user_turn, bot_turn) in history: | |
prompt += f"User: {user_turn}\nAssistant: {bot_turn}\n" | |
prompt += f"User: {user_input}\nAssistant: " | |
# Generate response using LLAMA-CPP. | |
response_text = generate_response(prompt, max_tokens=max_tokens, temperature=temperature) | |
# Convert the response to speech audio. | |
audio_response = text_to_speech(response_text) | |
# Append this turn to the conversation history. | |
new_history = history.copy() if history else [] | |
new_history.append((user_input, response_text)) | |
# Return four outputs: update the Chatbot display, show the assistant text, play audio, and update state. | |
return new_history, response_text, audio_response, new_history | |
# ----- Gradio Interface ----- | |
with gr.Blocks() as demo: | |
gr.Markdown("# Voice Chatbot with LLAMA‑CPP") | |
with gr.Row(): | |
with gr.Column(scale=5): | |
# User inputs: Audio input and/or text input. | |
audio_input = gr.Audio(type="filepath", label="Speak to Chatbot") | |
text_input = gr.Textbox(placeholder="Or type your message", label="Your Message") | |
send_btn = gr.Button("Send") | |
max_tokens_slider = gr.Slider(50, 300, value=150, step=10, label="Max Tokens") | |
temperature_slider = gr.Slider(0.1, 1.0, value=0.7, step=0.1, label="Temperature") | |
with gr.Column(scale=7): | |
# Display outputs: Chat history, assistant text response, and audio playback. | |
chat_history = gr.Chatbot(label="Chat History") | |
response_textbox = gr.Textbox(label="Assistant Response") | |
audio_output = gr.Audio(label="Response Audio", type="filepath") | |
# Gradio State to hold the conversation history. | |
state = gr.State([]) | |
def run_voice_chat(audio, text, history, max_tokens, temperature): | |
return voice_chat(audio, text, history, max_tokens, temperature) | |
# On clicking the "Send" button, run the voice_chat function. | |
send_btn.click( | |
fn=run_voice_chat, | |
inputs=[audio_input, text_input, state, max_tokens_slider, temperature_slider], | |
outputs=[chat_history, response_textbox, audio_output, state] | |
) | |
# Launch the app. | |
demo.launch() | |