import gradio as gr from llama_cpp import Llama import whisper from gtts import gTTS import tempfile import os from huggingface_hub import hf_hub_download # ----- Initialization ----- model_path = hf_hub_download(repo_id=TheBloke/Llama-2-7B-GGUF, filename=llama-2-7b.Q2_K.gguf) # Initialize the LLAMA model. Update the model_path to point to your model file. llm = Llama( model_path=model_path, n_threads=2, # CPU cores n_batch=512, # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU. n_gpu_layers=43, # Change this value based on your model and your GPU VRAM pool. n_ctx=4096, # Context window ) # Load the Whisper model for speech-to-text transcription. whisper_model = whisper.load_model("base") # ----- Helper Functions ----- def transcribe_audio(audio_file): """ Transcribes the provided audio file using Whisper. """ if audio_file is None: return "" result = whisper_model.transcribe(audio_file) return result["text"] def generate_response(prompt, max_tokens=150, temperature=0.7): """ Uses LLAMA-CPP to generate a response for the given prompt. """ # Call the LLAMA model. The output is a dict with a "choices" list. output = llm(prompt, max_tokens=max_tokens, temperature=temperature, echo=True) response = output["choices"][0]["text"] return response.strip() def text_to_speech(text): """ Converts text to speech using gTTS and returns the filepath to the saved audio. """ tts = gTTS(text=text, lang="en") tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") tts.save(tmp_file.name) return tmp_file.name def voice_chat(audio, text, history, max_tokens, temperature): """ Handles a single turn of the conversation: - If an audio file is provided and no text message, transcribe it. - Builds a conversation prompt from the chat history. - Generates a response from LLAMA. - Converts the response to speech. Returns updated chat history, the response text, the response audio filepath, and updated state. """ # Use the transcribed audio if text is empty. if audio is not None and (text is None or text.strip() == ""): user_input = transcribe_audio(audio) else: user_input = text if text else "" # Build the conversation prompt (history is a list of tuples: (user, assistant)) prompt = "" if history: for (user_turn, bot_turn) in history: prompt += f"User: {user_turn}\nAssistant: {bot_turn}\n" prompt += f"User: {user_input}\nAssistant: " # Generate response using LLAMA-CPP. response_text = generate_response(prompt, max_tokens=max_tokens, temperature=temperature) # Convert the response to speech audio. audio_response = text_to_speech(response_text) # Append this turn to the conversation history. new_history = history.copy() if history else [] new_history.append((user_input, response_text)) # Return four outputs: update the Chatbot display, show the assistant text, play audio, and update state. return new_history, response_text, audio_response, new_history # ----- Gradio Interface ----- with gr.Blocks() as demo: gr.Markdown("# Voice Chatbot with LLAMA‑CPP") with gr.Row(): with gr.Column(scale=5): # User inputs: Audio input and/or text input. audio_input = gr.Audio(type="filepath", label="Speak to Chatbot") text_input = gr.Textbox(placeholder="Or type your message", label="Your Message") send_btn = gr.Button("Send") max_tokens_slider = gr.Slider(50, 300, value=150, step=10, label="Max Tokens") temperature_slider = gr.Slider(0.1, 1.0, value=0.7, step=0.1, label="Temperature") with gr.Column(scale=7): # Display outputs: Chat history, assistant text response, and audio playback. chat_history = gr.Chatbot(label="Chat History") response_textbox = gr.Textbox(label="Assistant Response") audio_output = gr.Audio(label="Response Audio", type="filepath") # Gradio State to hold the conversation history. state = gr.State([]) def run_voice_chat(audio, text, history, max_tokens, temperature): return voice_chat(audio, text, history, max_tokens, temperature) # On clicking the "Send" button, run the voice_chat function. send_btn.click( fn=run_voice_chat, inputs=[audio_input, text_input, state, max_tokens_slider, temperature_slider], outputs=[chat_history, response_textbox, audio_output, state] ) # Launch the app. demo.launch()