import os import gradio as gr import whisper from groq import Groq from gtts import gTTS import tempfile # Set up Groq API os.environ["GROQ_API_KEY"] = "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" # Initialize Groq client client = Groq(api_key=os.environ.get("GROQ_API_KEY")) # Load Whisper model whisper_model = whisper.load_model("base") # Function to process audio input def process_audio(audio_file): try: # Step 1: Transcribe audio to text using Whisper if not audio_file or not os.path.exists(audio_file): return "Error: Invalid audio input.", None print("Transcribing audio...") transcription_result = whisper_model.transcribe(audio_file) transcription = transcription_result.get("text", "").strip() print(f"Transcription: {transcription}") if not transcription: return "Error: Failed to transcribe audio.", None # Step 2: Interact with Groq API print("Interacting with Groq API...") response = client.chat.completions.create( messages=[{"role": "user", "content": transcription}], model="llama3-8b-8192", ) response_text = response.choices[0].message.content.strip() print(f"Groq Response: {response_text}") if not response_text: return "Error: Groq API returned an empty response.", None # Step 3: Convert response text to speech using gTTS print("Converting response to audio...") tts = gTTS(response_text) temp_audio_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") tts.save(temp_audio_file.name) return response_text, temp_audio_file.name except Exception as e: print(f"Error: {e}") return "Error occurred during processing.", None # Define Gradio interface def chatbot(audio_file): response_text, audio_path = process_audio(audio_file) return response_text, audio_path with gr.Blocks() as demo: gr.Markdown("## Real-Time Voice-to-Voice Chatbot") with gr.Row(): audio_input = gr.Audio(type="filepath", label="Speak Now") text_output = gr.Textbox(label="Chatbot Response") audio_output = gr.Audio(label="Audio Response") submit_button = gr.Button("Submit") submit_button.click(chatbot, inputs=audio_input, outputs=[text_output, audio_output]) # Launch Gradio app demo.launch()