import os import whisper from groq import Groq from gtts import gTTS import tempfile import gradio as gr # Step 1: Set up Whisper for transcription model = whisper.load_model("base") # Ensure correct version of whisper is installed # Function to transcribe audio using Whisper def transcribe_audio(audio_file): result = model.transcribe(audio_file) return result["text"] # Step 2: Set up Groq API for interacting with the LLM (e.g., Llama 3) api_key = "your_groq_api_key" # Replace with your actual API key client = Groq(api_key=api_key) # Function to get a response from the Groq LLM (Llama 3) def get_groq_response(text): chat_completion = client.chat.completions.create( messages=[{"role": "user", "content": text}], model="llama3-8b-8192", # Use any other model if you prefer stream=False ) return chat_completion.choices[0].message.content # Step 3: Convert text response from LLM to speech using GTTS def text_to_speech(text): tts = gTTS(text) # Save the audio to a temporary file temp_file = tempfile.NamedTemporaryFile(delete=False) tts.save(temp_file.name) # Return the file path for Gradio to play the audio return temp_file.name # Step 4: Integrate everything into a Gradio interface def voice_chatbot(audio_input): # Step 1: Transcribe the audio using Whisper transcription = transcribe_audio(audio_input) # Step 2: Get response from Groq API using the transcription response = get_groq_response(transcription) # Step 3: Convert the response text to speech using GTTS audio_response = text_to_speech(response) # Return the audio response (Gradio will play it) return audio_response # Step 5: Create the Gradio interface (microphone input and audio output) iface = gr.Interface(fn=voice_chatbot, inputs=gr.Audio(type="filepath"), # No source="microphone" argument outputs=gr.Audio(), live=True) # Launch the Gradio interface iface.launch()