Voicebot / app.py
dschandra's picture
Update app.py
92f2b60 verified
raw
history blame
1.86 kB
from transformers import pipeline
from gtts import gTTS
import gradio as gr
import os
# Initialize Whisper pipeline for speech-to-text
pipe = pipeline("automatic-speech-recognition", model="openai/whisper-large-v3-turbo")
# Menu for the restaurant
menu = {
"Starters": ["Soup", "Spring Rolls"],
"Main Course": ["Paneer Butter Masala", "Chicken Curry", "Veg Biryani"],
"Breads": ["Roti", "Naan", "Paratha"],
"Desserts": ["Gulab Jamun", "Ice Cream"],
"Drinks": ["Mango Lassi", "Soda", "Water"]
}
# Function to convert text to speech
def text_to_speech(text):
tts = gTTS(text, lang="en")
audio_file = "response.mp3"
tts.save(audio_file)
return audio_file
# Chatbot logic
def chatbot_conversation(audio_file):
# Speech-to-text using Whisper
try:
transcription = pipe(audio_file)["text"]
except Exception as e:
return f"Error: {e}", None
# Generate a response based on transcription
if "menu" in transcription.lower():
response = "Our menu categories are: " + ", ".join(menu.keys())
elif "order" in transcription.lower():
response = "What would you like to order? We have " + ", ".join(menu["Main Course"])
elif "thank you" in transcription.lower():
response = "You're welcome! Enjoy your meal!"
else:
response = "I'm sorry, I didn't understand that. Could you please repeat?"
# Convert response to audio
audio_response = text_to_speech(response)
return response, audio_response
# Gradio Interface
iface = gr.Interface(
fn=chatbot_conversation,
inputs=gr.Audio(type="filepath"),
outputs=[gr.Textbox(label="Transcription"), gr.Audio(label="Response Audio")],
title="Restaurant Chatbot with Whisper ASR",
description="Speak to the chatbot and get a response!",
)
if __name__ == "__main__":
iface.launch()