from transformers import pipeline from gtts import gTTS import gradio as gr import os # Initialize Whisper pipeline for speech-to-text pipe = pipeline("automatic-speech-recognition", model="openai/whisper-large-v3-turbo") # Menu for the restaurant menu = { "Starters": ["Soup", "Spring Rolls"], "Main Course": ["Paneer Butter Masala", "Chicken Curry", "Veg Biryani"], "Breads": ["Roti", "Naan", "Paratha"], "Desserts": ["Gulab Jamun", "Ice Cream"], "Drinks": ["Mango Lassi", "Soda", "Water"] } # Function to convert text to speech def text_to_speech(text): tts = gTTS(text, lang="en") audio_file = "response.mp3" tts.save(audio_file) return audio_file # Chatbot logic def chatbot_conversation(audio_file): # Speech-to-text using Whisper try: transcription = pipe(audio_file)["text"] except Exception as e: return f"Error: {e}", None # Generate a response based on transcription if "menu" in transcription.lower(): response = "Our menu categories are: " + ", ".join(menu.keys()) elif "order" in transcription.lower(): response = "What would you like to order? We have " + ", ".join(menu["Main Course"]) elif "thank you" in transcription.lower(): response = "You're welcome! Enjoy your meal!" else: response = "I'm sorry, I didn't understand that. Could you please repeat?" # Convert response to audio audio_response = text_to_speech(response) return response, audio_response # Gradio Interface iface = gr.Interface( fn=chatbot_conversation, inputs=gr.Audio(type="filepath"), outputs=[gr.Textbox(label="Transcription"), gr.Audio(label="Response Audio")], title="Restaurant Chatbot with Whisper ASR", description="Speak to the chatbot and get a response!", ) if __name__ == "__main__": iface.launch()