import gradio as gr from llama_cpp import Llama import whisper from gtts import gTTS import tempfile import os from huggingface_hub import hf_hub_download # ----- Initialization ------ model_name_or_path = "TheBloke/Llama-2-13B-chat-GGUF" model_basename = "llama-2-13b-chat.Q5_K_M.gguf" model_path = hf_hub_download( repo_id=model_name_or_path, filename=model_basename ) # Initialize the LLAMA model. llm = Llama( model_path=model_path, n_threads=2, # CPU cores n_batch=512, n_gpu_layers=43, n_ctx=4096, ) # Load the Whisper model for speech-to-text transcription. whisper_model = whisper.load_model("base") # ----- Helper Functions ----- def transcribe_audio(audio_file): """Transcribes the provided audio file using Whisper.""" if audio_file is None: return "" result = whisper_model.transcribe(audio_file) return result["text"] def generate_response(prompt, max_tokens=150, temperature=0.7): """ Uses LLAMA-CPP to generate a response for the given prompt. """ output = llm(prompt, max_tokens=max_tokens, temperature=temperature, stop=["User:"]) response = output["choices"][0]["text"] return response.strip() def text_to_speech(text): """Converts text to speech using gTTS and returns the filepath to the saved audio.""" tts = gTTS(text=text, lang="en") tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") tts.save(tmp_file.name) return tmp_file.name def voice_chat(audio, text, history, max_tokens, temperature): """ Handles a single turn of the conversation: - If an audio file is provided and no text message, transcribe it. - Builds a prompt using only the current user input with additional instructions. - Generates a response from LLAMA. - Converts the assistant's response to speech. Returns: - A new history containing only the current turn. - The assistant's response text. - The assistant's response audio filepath. - Updated state (new history). """ # Use the transcribed audio if text is empty. if audio is not None and (text is None or text.strip() == ""): user_input = transcribe_audio(audio) else: user_input = text if text else "" # Additional system instructions for improved behavior. system_prompt = ("You are a helpful, knowledgeable, and concise assistant. " "Provide accurate, factual, and polite responses. " "Answer the user's question directly without unnecessary commentary.") # Build prompt using the system instructions plus the current user input. prompt = f"{system_prompt}\n\nUser: {user_input}\nAssistant: " # Generate response using LLAMA-CPP. response_text = generate_response(prompt, max_tokens=max_tokens, temperature=temperature) # Convert only the assistant's response to speech. audio_response = text_to_speech(response_text) # Reset conversation history to only include the current turn. new_history = [(user_input, response_text)] return new_history, response_text, audio_response, new_history # ----- Gradio Interface ----- with gr.Blocks() as demo: gr.Markdown("# Voice Chatbot with LLAMA‑CPP") with gr.Row(): with gr.Column(scale=5): # User inputs: Audio input and/or text input. audio_input = gr.Audio(type="filepath", label="Speak to Chatbot") text_input = gr.Textbox(placeholder="Or type your message", label="Your Message") send_btn = gr.Button("Send") max_tokens_slider = gr.Slider(50, 300, value=150, step=10, label="Max Tokens") temperature_slider = gr.Slider(0.1, 1.0, value=0.7, step=0.1, label="Temperature") with gr.Column(scale=7): # Display outputs: Chat history, assistant text response, and audio playback. chat_history = gr.Chatbot(label="Chat History") response_textbox = gr.Textbox(label="Assistant Response") audio_output = gr.Audio(label="Response Audio", type="filepath") # Gradio State to hold the conversation history. state = gr.State([]) def run_voice_chat(audio, text, history, max_tokens, temperature): return voice_chat(audio, text, history, max_tokens, temperature) # On clicking the "Send" button, run the voice_chat function. send_btn.click( fn=run_voice_chat, inputs=[audio_input, text_input, state, max_tokens_slider, temperature_slider], outputs=[chat_history, response_textbox, audio_output, state] ) # Launch the app. demo.launch()