import gradio as gr


def respond(audio_input):
    user_input = transcribe_audio(audio_input)
    text_response, output_path = generate_response(user_input)

    audio = AudioSegment.from_wav(output_path)
    duration = len(audio) / 1000

    return text_response, output_path

input_audio = gr.Audio(
    sources=["microphone"],
    waveform_options=gr.WaveformOptions(
        waveform_color="#01C6FF",
        waveform_progress_color="#0066B4",
        skip_length=2,
        show_controls=False,
    ),
)

gr.Interface(
    fn=respond,
    inputs=input_audio,
    outputs="text",
    title="Tommy Vercetti Chatbot",
    description="Chat with Tommy Vercetti from GTA Vice City. Get responses in both text and voice!"
).launch(debug=True)