fun-image-caption

Sleeping

Dylan

more separators

c893ae0 2 months ago

2.38 kB

	import spaces
	import gradio as gr

	from agents import build_graph

	# Initialize the graph
	graph = build_graph()


	@spaces.GPU(duration=60)
	def process_and_display(image, voices):
	if not voices: # If no voices selected
	return "Please select at least one voice persona.", "No voice personas selected."

	# Initialize state
	state = {"image": image, "voices": voices, "caption": "", "descriptions": []}

	# Run the graph
	result = graph.invoke(state, {"max_concurrency" : 1})

	descriptions:list[str] = result["descriptions"]
	description = "\n\n---\n\n".join(descriptions)

	# Return the caption and description
	return result["caption"], description


	def create_interface():
	with gr.Blocks() as demo:
	gr.Markdown("# Image Description with Voice Personas")
	gr.Markdown("""
	This app takes an image and generates descriptions using selected voice personas.

	1. Upload an image
	2. Select voice personas from the multi-select dropdown
	3. Click "Generate Description" to see the results

	The descriptions will be generated in parallel for all selected voices.
	""")

	with gr.Row():
	with gr.Column():
	image_input = gr.Image(type="pil", label="Upload an Image")
	voice_dropdown = gr.Dropdown(
	choices=[
	"scurvy-ridden pirate",
	"private investigator",
	"sarcastic teenager",
	"forgetful wizard",
	"shakespearian"
	],
	label="Select Voice Personas (max 2 recommended)",
	multiselect=True,
	value=["scurvy-ridden pirate", "private investigator"]
	)
	submit_button = gr.Button("Generate Description")

	with gr.Column():
	caption_output = gr.Textbox(label="Image Caption", lines=4)
	description_output = gr.Markdown(label="Voice Descriptions")

	submit_button.click(
	fn=process_and_display,
	inputs=[image_input, voice_dropdown],
	outputs=[caption_output, description_output],
	)

	return demo


	# Launch the app
	demo = create_interface()

	if __name__ == "__main__":
	demo.launch()