import os import gradio as gr import whisper from gtts import gTTS from groq import Groq # Load the Groq API key from the environment variable api_key = os.getenv("GROQ_API_KEY") if not api_key: raise ValueError("No API key found. Please set the GROQ_API_KEY environment variable.") # Initialize Whisper and Groq whisper_model = whisper.load_model("base") client = Groq(api_key=api_key) def chatbot(audio_input): try: # Debug: Check the type and content of audio_input print(f"Audio input type: {type(audio_input)}") if audio_input is None: raise ValueError("Audio input is None. Please provide a valid audio file.") # Debug: Check if the file exists if not os.path.exists(audio_input): raise FileNotFoundError(f"Audio file {audio_input} not found.") # Step 1: Load and transcribe audio input using Whisper audio = whisper.load_audio(audio_input) transcription_result = whisper_model.transcribe(audio) if transcription_result is None or not transcription_result.get("text"): raise ValueError("Whisper transcription failed or returned empty text.") transcription = transcription_result["text"] # Step 2: Generate response using LLaMA 8B model via Groq API chat_completion = client.chat.completions.create( messages=[ { "role": "user", "content": transcription, } ], model="llama3-8b-8192", ) # Check if the response from Groq is valid if chat_completion and chat_completion.choices: response_text = chat_completion.choices[0].message.content else: raise ValueError("Invalid response from Groq API") # Step 3: Convert text response to speech using GTTS if response_text.strip(): tts = gTTS(response_text) tts.save("response.mp3") else: raise ValueError("Response text is empty or invalid") # Step 4: Return the response audio and text transcription return "response.mp3", transcription, response_text except Exception as e: # Handle and display the error return None, transcription if 'transcription' in locals() else None, f"Error: {str(e)}" # Define the Gradio interface interface = gr.Interface( fn=chatbot, inputs=gr.Audio(type="filepath"), outputs=[gr.Audio(type="filepath"), "text", "text"], title="Voice-to-Voice Chatbot", description="Speak to the chatbot and get a real-time response.", live=True # Automatically processes input without requiring a button click ) # Launch the Gradio app if __name__ == "__main__": interface.launch()