import gradio as gr

from agents import build_graph
import spaces

# Initialize the graph
graph = build_graph()


@spaces.GPU(duration=60)
def process_and_display(image, voice):
    # Initialize state
    state = {"image": image, "voice": voice, "caption": "", "description": ""}

    # Run the graph
    result = graph.invoke(state)

    # Return the caption and description
    return result["caption"], result["description"]


def create_interface():
    with gr.Blocks() as demo:
        gr.Markdown("# Image Description with Voice Personas")
        gr.Markdown("""
        This app takes an image and generates a description using a selected voice persona.
        
        1. Upload an image
        2. Select a voice persona from the dropdown
        3. Click "Generate Description" to see the results
        """)

        with gr.Row():
            with gr.Column():
                image_input = gr.Image(type="pil", label="Upload an Image")
                voice_dropdown = gr.Dropdown(
                    choices=[
                        "scurvy-ridden pirate",
                        "forgetful wizard",
                        "sarcastic teenager",
                    ],
                    label="Select a Voice",
                    value="scurvy-ridden pirate",
                )
                submit_button = gr.Button("Generate Description")

            with gr.Column():
                caption_output = gr.Textbox(label="Image Caption")
                description_output = gr.Textbox(label="Voice Description")

        submit_button.click(
            fn=process_and_display,
            inputs=[image_input, voice_dropdown],
            outputs=[caption_output, description_output],
        )

    return demo


# Launch the app
demo = create_interface()

if __name__ == "__main__":
    demo.launch()