# Install necessary libraries #pip uninstall -y whisper #pip install git+https://github.com/openai/whisper.git #pip install gradio gtts groq ffmpeg-python # Import required libraries import os import gradio as gr import whisper from gtts import gTTS import io from groq import Groq # Set your GROQ_API_KEY os.environ["GROQ_API_KEY"] = "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" # Initialize Groq client and Whisper model client = Groq(api_key=os.environ.get("GROQ_API_KEY")) model = whisper.load_model("base", device="cpu") # Function to process audio def process_audio(file_path): try: # Ensure the file exists if not os.path.isfile(file_path): raise FileNotFoundError(f"The file {file_path} does not exist.") print(f"Processing file: {file_path}") # Load and process the audio with Whisper audio = whisper.load_audio(file_path) print("Audio loaded successfully.") # Transcribe the audio result = model.transcribe(audio) text = result["text"] print("Transcription:", text) # Generate a response using Groq API chat_completion = client.chat.completions.create( messages=[{"role": "user", "content": text}], model="llama3-8b-8192", ) response_message = chat_completion.choices[0].message.content.strip() print("Chatbot response:", response_message) # Convert the response to audio tts = gTTS(response_message) response_audio_io = io.BytesIO() tts.write_to_fp(response_audio_io) response_audio_io.seek(0) # Save the response audio to a file response_audio_path = "response.mp3" with open(response_audio_path, "wb") as audio_file: audio_file.write(response_audio_io.getvalue()) return response_message, response_audio_path except FileNotFoundError as e: return f"File not found: {e}", None except UnicodeDecodeError as e: return f"Invalid audio file encoding: {e}", None except Exception as e: return f"An unexpected error occurred: {e}", None # Define Gradio interface title = "Voice-to-Voice Chatbot Application" description = "Run a voice-to-voice chatbot with transcription and audio response." article = "### Instructions\n1. Upload an audio file.\n2. Wait for transcription and chatbot's response.\n3. Listen to the response audio." iface = gr.Interface( fn=process_audio, inputs=gr.Audio(type="filepath", label="Upload an Audio File"), outputs=[ gr.Textbox(label="Response Text"), gr.Audio(label="Response Audio") ], live=True, title=title, description=description, article=article ) # Launch Gradio interface iface.launch(share=True)