import gradio as gr from transformers import pipeline from gtts import gTTS import os import numpy as np # Initialize the speech recognition pipeline asr_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-large") # Conversation history and context conversation_history = [] context = {"last_action": None, "order": []} # Menu for the restaurant menu = { "Starters": ["Soup", "Spring Rolls"], "Main Course": ["Paneer Butter Masala", "Chicken Curry", "Veg Biryani"], "Breads": ["Roti", "Naan", "Paratha"], "Desserts": ["Gulab Jamun", "Ice Cream"], "Drinks": ["Mango Lassi", "Soda", "Water"] } # Text-to-Speech Function def speak_and_save(text, filename="response.mp3"): tts = gTTS(text=text, lang='en') tts.save(filename) return filename # Process the audio file and generate response def process_order(audio_file_path): if audio_file_path is None: raise ValueError("Audio file path is None. Please provide a valid path.") # Recognize speech try: transcript = asr_pipeline(audio_file_path)["text"] except Exception as e: return f"Error in speech recognition: {e}", None # Process the recognized text global context user_input = transcript.lower() conversation_history.append(f"Customer: {user_input}") response = "" if context["last_action"] is None: response = "Welcome to our restaurant! How can I assist you today?" context["last_action"] = "greet" elif "menu" in user_input: response = "Here is our menu:\n" for category, items in menu.items(): response += f"{category}: {', '.join(items)}\n" response += "What would you like to order?" context["last_action"] = "show_menu" elif "order" in user_input or any(item.lower() in user_input for item in sum(menu.values(), [])): for category, items in menu.items(): for item in items: if item.lower() in user_input: context["order"].append(item) response = f"I have added {', '.join(context['order'])} to your order. Would you like anything else?" context["last_action"] = "place_order" elif "no" in user_input or "that's it" in user_input: response = f"Your final order is: {', '.join(context['order'])}. Thank you for your order. Your food will arrive shortly." context["last_action"] = "final_order" context["order"] = [] # Reset the order else: response = "I'm not sure what you meant. Could you clarify?" conversation_history.append(f"AI: {response}") audio_response_path = speak_and_save(response) return response, audio_response_path # Save Conversation History def save_conversation(): with open("conversation_history.txt", "w") as f: f.write("\n".join(conversation_history)) return "Conversation history saved successfully!" # Gradio Interface def create_interface(): return gr.Interface( fn=process_order, inputs=gr.Audio(type="filepath", label="Your Voice Input"), outputs=[ gr.Textbox(label="Text Response"), gr.Audio(label="Audio Response") ], title="Restaurant Voice Assistant", description="Talk to our voice assistant to place your order or ask about the menu!", live=True ) if __name__ == "__main__": try: app = create_interface() app.launch() finally: save_conversation()