import os import gradio as gr import whisper from gtts import gTTS import io from groq import Groq # Initialize the Groq client client = Groq(api_key=os.environ.get("GROQ_API_KEY")) # Load the Whisper model model = whisper.load_model("base") def process_audio(file_path): try: # Load the audio file audio = whisper.load_audio(file_path) # Transcribe the audio using Whisper result = model.transcribe(audio) text = result["text"] # Generate a response using Groq chat_completion = client.chat.completions.create( messages=[{"role": "user", "content": text}], model="llama3-8b-8192", # Replace with the correct model if necessary ) # Access the response using dot notation response_message = chat_completion.choices[0].message.content.strip() # Convert the response text to speech tts = gTTS(response_message) response_audio_io = io.BytesIO() tts.write_to_fp(response_audio_io) # Save the audio to the BytesIO object response_audio_io.seek(0) # Save audio to a file to ensure it's generated correctly response_audio_path = "response.mp3" with open(response_audio_path, "wb") as audio_file: audio_file.write(response_audio_io.getvalue()) # Return the response text and the path to the saved audio file return response_message, response_audio_path except Exception as e: return f"An error occurred: {e}", None # Create the Gradio interface with customized UI with gr.Blocks() as demo: gr.Markdown( """ """ ) gr.Markdown("# Voice-to-Voice Chatbot\nDeveloped by Salman Maqbool ❤️") gr.Markdown("Upload an audio file to interact with the voice-to-voice chatbot. The chatbot will transcribe the audio, generate a response, and provide a spoken reply.") with gr.Row(): with gr.Column(): audio_input = gr.Audio(type="filepath", label="Upload Audio File") submit_button = gr.Button("Submit") with gr.Column(): response_text = gr.Textbox(label="Response Text", placeholder="The AI-generated response will appear here", lines=5) response_audio = gr.Audio(label="Response Audio", type="filepath") # Link the submit button to the process_audio function submit_button.click(fn=process_audio, inputs=audio_input, outputs=[response_text, response_audio]) # Launch the Gradio app demo.launch()