import gradio as gr def respond(audio_input): user_input = transcribe_audio(audio_input) text_response, output_path = generate_response(user_input) audio = AudioSegment.from_wav(output_path) duration = len(audio) / 1000 return text_response, output_path input_audio = gr.Audio( sources=["microphone"], waveform_options=gr.WaveformOptions( waveform_color="#01C6FF", waveform_progress_color="#0066B4", skip_length=2, show_controls=False, ), ) gr.Interface( fn=respond, inputs=input_audio, outputs="text", title="Tommy Vercetti Chatbot", description="Chat with Tommy Vercetti from GTA Vice City. Get responses in both text and voice!" ).launch(debug=True)