import spaces
import gradio as gr

from agents import build_graph

# Initialize the graph
graph = build_graph()


@spaces.GPU(duration=60)
def process_and_display(image, voices):
    if not voices:  # If no voices selected
        return "Please select at least one voice persona.", "No voice personas selected."
    
    # Initialize state
    state = {"image": image, "voices": voices, "caption": "", "descriptions": []}

    # Run the graph
    result = graph.invoke(state, {"max_concurrency" : 1})

    descriptions:list[str] = result["descriptions"]
    description = "\n\n---\n\n".join(descriptions)

    # Return the caption and description
    return result["caption"], description


def create_interface():
    with gr.Blocks() as demo:
        gr.Markdown("# Image Description with Voice Personas")
        gr.Markdown("""
        This app takes an image and generates descriptions using selected voice personas.
        
        1. Upload an image
        2. Select voice personas from the multi-select dropdown
        3. Click "Generate Description" to see the results
        
        The descriptions will be generated in parallel for all selected voices.
        """)

        with gr.Row():
            with gr.Column():
                image_input = gr.Image(type="pil", label="Upload an Image")
                voice_dropdown = gr.Dropdown(
                    choices=[
                        "scurvy-ridden pirate",
                        "private investigator",
                        "sarcastic teenager",
                        "forgetful wizard",
                        "shakespearian"
                    ],
                    label="Select Voice Personas (max 2 recommended)",
                    multiselect=True,
                    value=["scurvy-ridden pirate", "private investigator"]
                )
                submit_button = gr.Button("Generate Description")

            with gr.Column():
                caption_output = gr.Textbox(label="Image Caption", lines=4)
                description_output = gr.Markdown(label="Voice Descriptions")

        submit_button.click(
            fn=process_and_display,
            inputs=[image_input, voice_dropdown],
            outputs=[caption_output, description_output],
        )

    return demo


# Launch the app
demo = create_interface()

if __name__ == "__main__":
    demo.launch()