import gradio as gr
from transformers import pipeline
from gtts import gTTS
import os
import numpy as np

# Initialize the speech recognition pipeline
asr_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-large")

# Conversation history and context
conversation_history = []
context = {"last_action": None, "order": []}

# Menu for the restaurant
menu = {
    "Starters": ["Soup", "Spring Rolls"],
    "Main Course": ["Paneer Butter Masala", "Chicken Curry", "Veg Biryani"],
    "Breads": ["Roti", "Naan", "Paratha"],
    "Desserts": ["Gulab Jamun", "Ice Cream"],
    "Drinks": ["Mango Lassi", "Soda", "Water"]
}

# Text-to-Speech Function
def speak_and_save(text, filename="response.mp3"):
    tts = gTTS(text=text, lang='en')
    tts.save(filename)
    return filename

# Process the audio file and generate response
def process_order(audio_file_path):
    if audio_file_path is None:
        raise ValueError("Audio file path is None. Please provide a valid path.")

    # Recognize speech
    try:
        transcript = asr_pipeline(audio_file_path)["text"]
    except Exception as e:
        return f"Error in speech recognition: {e}", None

    # Process the recognized text
    global context
    user_input = transcript.lower()
    conversation_history.append(f"Customer: {user_input}")
    response = ""

    if context["last_action"] is None:
        response = "Welcome to our restaurant! How can I assist you today?"
        context["last_action"] = "greet"

    elif "menu" in user_input:
        response = "Here is our menu:\n"
        for category, items in menu.items():
            response += f"{category}: {', '.join(items)}\n"
        response += "What would you like to order?"
        context["last_action"] = "show_menu"

    elif "order" in user_input or any(item.lower() in user_input for item in sum(menu.values(), [])):
        for category, items in menu.items():
            for item in items:
                if item.lower() in user_input:
                    context["order"].append(item)
        response = f"I have added {', '.join(context['order'])} to your order. Would you like anything else?"
        context["last_action"] = "place_order"

    elif "no" in user_input or "that's it" in user_input:
        response = f"Your final order is: {', '.join(context['order'])}. Thank you for your order. Your food will arrive shortly."
        context["last_action"] = "final_order"
        context["order"] = []  # Reset the order

    else:
        response = "I'm not sure what you meant. Could you clarify?"

    conversation_history.append(f"AI: {response}")
    audio_response_path = speak_and_save(response)
    return response, audio_response_path

# Save Conversation History
def save_conversation():
    with open("conversation_history.txt", "w") as f:
        f.write("\n".join(conversation_history))
    return "Conversation history saved successfully!"

# Gradio Interface
def create_interface():
    return gr.Interface(
        fn=process_order,
        inputs=gr.Audio(type="filepath", label="Your Voice Input"),
        outputs=[
            gr.Textbox(label="Text Response"),
            gr.Audio(label="Audio Response")
        ],
        title="Restaurant Voice Assistant",
        description="Talk to our voice assistant to place your order or ask about the menu!",
        live=True
    )

if __name__ == "__main__":
    try:
        app = create_interface()
        app.launch()
    finally:
        save_conversation()