TEST_HL / app.py
devkushal75's picture
Update app.py
9b4fabe verified
raw
history blame
4.66 kB
import gradio as gr
from llama_cpp import Llama
import whisper
from gtts import gTTS
import tempfile
import os
from huggingface_hub import hf_hub_download
# ----- Initialization ------
model_name_or_path = "TheBloke/Llama-2-13B-chat-GGUF"
model_basename = "llama-2-13b-chat.Q5_K_M.gguf"
model_path = hf_hub_download(
repo_id=model_name_or_path,
filename=model_basename
)
# Initialize the LLAMA model.
llm = Llama(
model_path=model_path,
n_threads=2, # CPU cores
n_batch=512,
n_gpu_layers=43,
n_ctx=4096,
)
# Load the Whisper model for speech-to-text transcription.
whisper_model = whisper.load_model("base")
# ----- Helper Functions -----
def transcribe_audio(audio_file):
"""Transcribes the provided audio file using Whisper."""
if audio_file is None:
return ""
result = whisper_model.transcribe(audio_file)
return result["text"]
def generate_response(prompt, max_tokens=150, temperature=0.7):
"""
Uses LLAMA-CPP to generate a response for the given prompt.
"""
output = llm(prompt, max_tokens=max_tokens, temperature=temperature, stop=["User:"])
response = output["choices"][0]["text"]
return response.strip()
def text_to_speech(text):
"""Converts text to speech using gTTS and returns the filepath to the saved audio."""
tts = gTTS(text=text, lang="en")
tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
tts.save(tmp_file.name)
return tmp_file.name
def voice_chat(audio, text, history, max_tokens, temperature):
"""
Handles a single turn of the conversation:
- If an audio file is provided and no text message, transcribe it.
- Builds a prompt using only the current user input with additional instructions.
- Generates a response from LLAMA.
- Converts the assistant's response to speech.
Returns:
- A new history containing only the current turn.
- The assistant's response text.
- The assistant's response audio filepath.
- Updated state (new history).
"""
# Use the transcribed audio if text is empty.
if audio is not None and (text is None or text.strip() == ""):
user_input = transcribe_audio(audio)
else:
user_input = text if text else ""
# Additional system instructions for improved behavior.
system_prompt = ("You are a helpful, knowledgeable, and concise assistant. "
"Provide accurate, factual, and polite responses. "
"Answer the user's question directly without unnecessary commentary.")
# Build prompt using the system instructions plus the current user input.
prompt = f"{system_prompt}\n\nUser: {user_input}\nAssistant: "
# Generate response using LLAMA-CPP.
response_text = generate_response(prompt, max_tokens=max_tokens, temperature=temperature)
# Convert only the assistant's response to speech.
audio_response = text_to_speech(response_text)
# Reset conversation history to only include the current turn.
new_history = [(user_input, response_text)]
return new_history, response_text, audio_response, new_history
# ----- Gradio Interface -----
with gr.Blocks() as demo:
gr.Markdown("# Voice Chatbot with LLAMA‑CPP")
with gr.Row():
with gr.Column(scale=5):
# User inputs: Audio input and/or text input.
audio_input = gr.Audio(type="filepath", label="Speak to Chatbot")
text_input = gr.Textbox(placeholder="Or type your message", label="Your Message")
send_btn = gr.Button("Send")
max_tokens_slider = gr.Slider(50, 300, value=150, step=10, label="Max Tokens")
temperature_slider = gr.Slider(0.1, 1.0, value=0.7, step=0.1, label="Temperature")
with gr.Column(scale=7):
# Display outputs: Chat history, assistant text response, and audio playback.
chat_history = gr.Chatbot(label="Chat History")
response_textbox = gr.Textbox(label="Assistant Response")
audio_output = gr.Audio(label="Response Audio", type="filepath")
# Gradio State to hold the conversation history.
state = gr.State([])
def run_voice_chat(audio, text, history, max_tokens, temperature):
return voice_chat(audio, text, history, max_tokens, temperature)
# On clicking the "Send" button, run the voice_chat function.
send_btn.click(
fn=run_voice_chat,
inputs=[audio_input, text_input, state, max_tokens_slider, temperature_slider],
outputs=[chat_history, response_textbox, audio_output, state]
)
# Launch the app.
demo.launch()