voicekkk / app.py
prasanth345's picture
Create app.py
c800729 verified
raw
history blame
2 kB
import gradio as gr
import pyttsx3
import speech_recognition as sr
from transformers import pipeline
# Initialize Text-to-Speech engine
engine = pyttsx3.init()
engine.setProperty('rate', 150)
# Load a conversational AI model
chatbot = pipeline("conversational", model="microsoft/DialoGPT-medium")
def tts_response(user_query):
"""Generate AI response, convert to speech, and return text."""
try:
response = chatbot(user_query)
bot_reply = response[0]['generated_text']
engine.say(bot_reply)
engine.runAndWait()
return bot_reply
except Exception as e:
return f"Error: {str(e)}"
def stt_to_tts(audio):
"""Convert speech to text, process with AI, then convert response to speech."""
recognizer = sr.Recognizer()
try:
with sr.AudioFile(audio) as source:
audio_data = recognizer.record(source)
user_query = recognizer.recognize_google(audio_data)
return tts_response(user_query)
except sr.UnknownValueError:
return "Sorry, I could not understand the audio."
except sr.RequestError as e:
return f"Request error from Speech Recognition service; {e}"
# Gradio interface
def gradio_ui():
with gr.Blocks() as app:
gr.Markdown("## AI Voice Agent")
with gr.Tab("Text-to-Speech"):
user_query = gr.Textbox(label="Your Query")
response = gr.Textbox(label="AI Response", interactive=False)
submit_button = gr.Button("Submit")
submit_button.click(tts_response, inputs=user_query, outputs=response)
with gr.Tab("Speech-to-Speech"):
audio_input = gr.Audio(source="microphone", type="filepath", label="Speak Now")
audio_response = gr.Textbox(label="AI Response")
audio_button = gr.Button("Submit")
audio_button.click(stt_to_tts, inputs=audio_input, outputs=audio_response)
return app
if __name__ == "__main__":
app = gradio_ui()
app.launch()