import gradio as gr from agents import build_graph import spaces # Initialize the graph graph = build_graph() @spaces.GPU(duration=60) def process_and_display(image, voice): # Initialize state state = {"image": image, "voice": voice, "caption": "", "description": ""} # Run the graph result = graph.invoke(state) # Return the caption and description return result["caption"], result["description"] def create_interface(): with gr.Blocks() as demo: gr.Markdown("# Image Description with Voice Personas") gr.Markdown(""" This app takes an image and generates a description using a selected voice persona. 1. Upload an image 2. Select a voice persona from the dropdown 3. Click "Generate Description" to see the results """) with gr.Row(): with gr.Column(): image_input = gr.Image(type="pil", label="Upload an Image") voice_dropdown = gr.Dropdown( choices=[ "scurvy-ridden pirate", "forgetful wizard", "sarcastic teenager", ], label="Select a Voice", value="scurvy-ridden pirate", ) submit_button = gr.Button("Generate Description") with gr.Column(): caption_output = gr.Textbox(label="Image Caption") description_output = gr.Textbox(label="Voice Description") submit_button.click( fn=process_and_display, inputs=[image_input, voice_dropdown], outputs=[caption_output, description_output], ) return demo # Launch the app demo = create_interface() if __name__ == "__main__": demo.launch()