import gradio as gr import pyttsx3 import speech_recognition as sr from transformers import pipeline # Initialize Text-to-Speech engine engine = pyttsx3.init() engine.setProperty('rate', 150) # Load a conversational AI model chatbot = pipeline("conversational", model="microsoft/DialoGPT-medium") def tts_response(user_query): """Generate AI response, convert to speech, and return text.""" try: response = chatbot(user_query) bot_reply = response[0]['generated_text'] engine.say(bot_reply) engine.runAndWait() return bot_reply except Exception as e: return f"Error: {str(e)}" def stt_to_tts(audio): """Convert speech to text, process with AI, then convert response to speech.""" recognizer = sr.Recognizer() try: with sr.AudioFile(audio) as source: audio_data = recognizer.record(source) user_query = recognizer.recognize_google(audio_data) return tts_response(user_query) except sr.UnknownValueError: return "Sorry, I could not understand the audio." except sr.RequestError as e: return f"Request error from Speech Recognition service; {e}" # Gradio interface def gradio_ui(): with gr.Blocks() as app: gr.Markdown("## AI Voice Agent") with gr.Tab("Text-to-Speech"): user_query = gr.Textbox(label="Your Query") response = gr.Textbox(label="AI Response", interactive=False) submit_button = gr.Button("Submit") submit_button.click(tts_response, inputs=user_query, outputs=response) with gr.Tab("Speech-to-Speech"): audio_input = gr.Audio(source="microphone", type="filepath", label="Speak Now") audio_response = gr.Textbox(label="AI Response") audio_button = gr.Button("Submit") audio_button.click(stt_to_tts, inputs=audio_input, outputs=audio_response) return app if __name__ == "__main__": app = gradio_ui() app.launch()