Spaces:
Running
Running
File size: 4,663 Bytes
c1533fd 7a2a265 dfd2dd8 9b4fabe dfd2dd8 c1533fd 02819d3 c1533fd 02819d3 c1533fd 02819d3 c1533fd 9b4fabe c1533fd 02819d3 c1533fd 4d5fc75 9b4fabe 4d5fc75 02819d3 4d5fc75 c1533fd 9b4fabe 4d5fc75 c1533fd 02819d3 c1533fd 02819d3 c1533fd 02819d3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
import gradio as gr
from llama_cpp import Llama
import whisper
from gtts import gTTS
import tempfile
import os
from huggingface_hub import hf_hub_download
# ----- Initialization ------
model_name_or_path = "TheBloke/Llama-2-13B-chat-GGUF"
model_basename = "llama-2-13b-chat.Q5_K_M.gguf"
model_path = hf_hub_download(
repo_id=model_name_or_path,
filename=model_basename
)
# Initialize the LLAMA model.
llm = Llama(
model_path=model_path,
n_threads=2, # CPU cores
n_batch=512,
n_gpu_layers=43,
n_ctx=4096,
)
# Load the Whisper model for speech-to-text transcription.
whisper_model = whisper.load_model("base")
# ----- Helper Functions -----
def transcribe_audio(audio_file):
"""Transcribes the provided audio file using Whisper."""
if audio_file is None:
return ""
result = whisper_model.transcribe(audio_file)
return result["text"]
def generate_response(prompt, max_tokens=150, temperature=0.7):
"""
Uses LLAMA-CPP to generate a response for the given prompt.
"""
output = llm(prompt, max_tokens=max_tokens, temperature=temperature, stop=["User:"])
response = output["choices"][0]["text"]
return response.strip()
def text_to_speech(text):
"""Converts text to speech using gTTS and returns the filepath to the saved audio."""
tts = gTTS(text=text, lang="en")
tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
tts.save(tmp_file.name)
return tmp_file.name
def voice_chat(audio, text, history, max_tokens, temperature):
"""
Handles a single turn of the conversation:
- If an audio file is provided and no text message, transcribe it.
- Builds a prompt using only the current user input with additional instructions.
- Generates a response from LLAMA.
- Converts the assistant's response to speech.
Returns:
- A new history containing only the current turn.
- The assistant's response text.
- The assistant's response audio filepath.
- Updated state (new history).
"""
# Use the transcribed audio if text is empty.
if audio is not None and (text is None or text.strip() == ""):
user_input = transcribe_audio(audio)
else:
user_input = text if text else ""
# Additional system instructions for improved behavior.
system_prompt = ("You are a helpful, knowledgeable, and concise assistant. "
"Provide accurate, factual, and polite responses. "
"Answer the user's question directly without unnecessary commentary.")
# Build prompt using the system instructions plus the current user input.
prompt = f"{system_prompt}\n\nUser: {user_input}\nAssistant: "
# Generate response using LLAMA-CPP.
response_text = generate_response(prompt, max_tokens=max_tokens, temperature=temperature)
# Convert only the assistant's response to speech.
audio_response = text_to_speech(response_text)
# Reset conversation history to only include the current turn.
new_history = [(user_input, response_text)]
return new_history, response_text, audio_response, new_history
# ----- Gradio Interface -----
with gr.Blocks() as demo:
gr.Markdown("# Voice Chatbot with LLAMA‑CPP")
with gr.Row():
with gr.Column(scale=5):
# User inputs: Audio input and/or text input.
audio_input = gr.Audio(type="filepath", label="Speak to Chatbot")
text_input = gr.Textbox(placeholder="Or type your message", label="Your Message")
send_btn = gr.Button("Send")
max_tokens_slider = gr.Slider(50, 300, value=150, step=10, label="Max Tokens")
temperature_slider = gr.Slider(0.1, 1.0, value=0.7, step=0.1, label="Temperature")
with gr.Column(scale=7):
# Display outputs: Chat history, assistant text response, and audio playback.
chat_history = gr.Chatbot(label="Chat History")
response_textbox = gr.Textbox(label="Assistant Response")
audio_output = gr.Audio(label="Response Audio", type="filepath")
# Gradio State to hold the conversation history.
state = gr.State([])
def run_voice_chat(audio, text, history, max_tokens, temperature):
return voice_chat(audio, text, history, max_tokens, temperature)
# On clicking the "Send" button, run the voice_chat function.
send_btn.click(
fn=run_voice_chat,
inputs=[audio_input, text_input, state, max_tokens_slider, temperature_slider],
outputs=[chat_history, response_textbox, audio_output, state]
)
# Launch the app.
demo.launch()
|