Dylan
more separators
c893ae0
import spaces
import gradio as gr
from agents import build_graph
# Initialize the graph
graph = build_graph()
@spaces.GPU(duration=60)
def process_and_display(image, voices):
if not voices: # If no voices selected
return "Please select at least one voice persona.", "No voice personas selected."
# Initialize state
state = {"image": image, "voices": voices, "caption": "", "descriptions": []}
# Run the graph
result = graph.invoke(state, {"max_concurrency" : 1})
descriptions:list[str] = result["descriptions"]
description = "\n\n---\n\n".join(descriptions)
# Return the caption and description
return result["caption"], description
def create_interface():
with gr.Blocks() as demo:
gr.Markdown("# Image Description with Voice Personas")
gr.Markdown("""
This app takes an image and generates descriptions using selected voice personas.
1. Upload an image
2. Select voice personas from the multi-select dropdown
3. Click "Generate Description" to see the results
The descriptions will be generated in parallel for all selected voices.
""")
with gr.Row():
with gr.Column():
image_input = gr.Image(type="pil", label="Upload an Image")
voice_dropdown = gr.Dropdown(
choices=[
"scurvy-ridden pirate",
"private investigator",
"sarcastic teenager",
"forgetful wizard",
"shakespearian"
],
label="Select Voice Personas (max 2 recommended)",
multiselect=True,
value=["scurvy-ridden pirate", "private investigator"]
)
submit_button = gr.Button("Generate Description")
with gr.Column():
caption_output = gr.Textbox(label="Image Caption", lines=4)
description_output = gr.Markdown(label="Voice Descriptions")
submit_button.click(
fn=process_and_display,
inputs=[image_input, voice_dropdown],
outputs=[caption_output, description_output],
)
return demo
# Launch the app
demo = create_interface()
if __name__ == "__main__":
demo.launch()