Spaces:
Running
on
Zero
Running
on
Zero
File size: 2,382 Bytes
6f322bd e82b768 a4690cb e82b768 a4690cb 73d2daa b5b9453 a4690cb b5b9453 a4690cb 7d14b9f a4690cb 598dcfa c893ae0 598dcfa a4690cb 598dcfa a4690cb b5b9453 a4690cb b5b9453 a4690cb b5b9453 a4690cb 598dcfa b5b9453 598dcfa a4690cb b5b9453 a4690cb b5b9453 68fe4b2 a4690cb b5b9453 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
import spaces
import gradio as gr
from agents import build_graph
# Initialize the graph
graph = build_graph()
@spaces.GPU(duration=60)
def process_and_display(image, voices):
if not voices: # If no voices selected
return "Please select at least one voice persona.", "No voice personas selected."
# Initialize state
state = {"image": image, "voices": voices, "caption": "", "descriptions": []}
# Run the graph
result = graph.invoke(state, {"max_concurrency" : 1})
descriptions:list[str] = result["descriptions"]
description = "\n\n---\n\n".join(descriptions)
# Return the caption and description
return result["caption"], description
def create_interface():
with gr.Blocks() as demo:
gr.Markdown("# Image Description with Voice Personas")
gr.Markdown("""
This app takes an image and generates descriptions using selected voice personas.
1. Upload an image
2. Select voice personas from the multi-select dropdown
3. Click "Generate Description" to see the results
The descriptions will be generated in parallel for all selected voices.
""")
with gr.Row():
with gr.Column():
image_input = gr.Image(type="pil", label="Upload an Image")
voice_dropdown = gr.Dropdown(
choices=[
"scurvy-ridden pirate",
"private investigator",
"sarcastic teenager",
"forgetful wizard",
"shakespearian"
],
label="Select Voice Personas (max 2 recommended)",
multiselect=True,
value=["scurvy-ridden pirate", "private investigator"]
)
submit_button = gr.Button("Generate Description")
with gr.Column():
caption_output = gr.Textbox(label="Image Caption", lines=4)
description_output = gr.Markdown(label="Voice Descriptions")
submit_button.click(
fn=process_and_display,
inputs=[image_input, voice_dropdown],
outputs=[caption_output, description_output],
)
return demo
# Launch the app
demo = create_interface()
if __name__ == "__main__":
demo.launch() |