from transformers import pipeline
from gtts import gTTS
import gradio as gr
import os

# Initialize Whisper pipeline for speech-to-text
pipe = pipeline("automatic-speech-recognition", model="openai/whisper-large-v3-turbo")

# Menu for the restaurant
menu = {
    "Starters": ["Soup", "Spring Rolls"],
    "Main Course": ["Paneer Butter Masala", "Chicken Curry", "Veg Biryani"],
    "Breads": ["Roti", "Naan", "Paratha"],
    "Desserts": ["Gulab Jamun", "Ice Cream"],
    "Drinks": ["Mango Lassi", "Soda", "Water"]
}

# Function to convert text to speech
def text_to_speech(text):
    tts = gTTS(text, lang="en")
    audio_file = "response.mp3"
    tts.save(audio_file)
    return audio_file

# Chatbot logic
def chatbot_conversation(audio_file):
    # Speech-to-text using Whisper
    try:
        transcription = pipe(audio_file)["text"]
    except Exception as e:
        return f"Error: {e}", None

    # Generate a response based on transcription
    if "menu" in transcription.lower():
        response = "Our menu categories are: " + ", ".join(menu.keys())
    elif "order" in transcription.lower():
        response = "What would you like to order? We have " + ", ".join(menu["Main Course"])
    elif "thank you" in transcription.lower():
        response = "You're welcome! Enjoy your meal!"
    else:
        response = "I'm sorry, I didn't understand that. Could you please repeat?"

    # Convert response to audio
    audio_response = text_to_speech(response)

    return response, audio_response

# Gradio Interface
iface = gr.Interface(
    fn=chatbot_conversation,
    inputs=gr.Audio(type="filepath"),
    outputs=[gr.Textbox(label="Transcription"), gr.Audio(label="Response Audio")],
    title="Restaurant Chatbot with Whisper ASR",
    description="Speak to the chatbot and get a response!",
)

if __name__ == "__main__":
    iface.launch()