Spaces:

bryanzhou008
/

Test_Forward_Dynamics_Annotation

Runtime error

App Files Files Community

Test_Forward_Dynamics_Annotation / app.py

bryanzhou008

Create app.py

9fa9058 verified 2 months ago

raw

history blame contribute delete

8.28 kB

	import gradio as gr
	import json
	import os

	# root directory of image files
	# each action should be saved under subdirectories
	# input filename
	image_root = "forward_dynamics_qa_pairs_v5_w_newline"
	# output filename
	output_file = "v5_forward_dynamics_user_choices.json"

	# # input filename
	# image_root = "/Users/bryan/Desktop/wkdir/VLM/src/human-annotation-interface/inverse_dynamics_qa_pairs_v5_w_newline"
	# # output filename
	# output_file = "/Users/bryan/Desktop/wkdir/VLM/src/human-annotation-interface/v5_inverse_dynamics_user_choices.json"


	if not os.path.exists(output_file):
	with open(output_file, 'w') as f:
	json.dump({}, f)

	def load_action_images():
	action_images = {}
	for action in os.listdir(image_root):
	action_dir = os.path.join(image_root, action)
	if os.path.isdir(action_dir):
	images = [f for f in os.listdir(action_dir) if f.endswith('.jpg')]
	images.sort() # Ensure files are sorted in ascending order
	action_images[action] = images
	return action_images

	def load_user_choices():
	with open(output_file, 'r') as f:
	return json.load(f)

	def save_user_choice(action, image_name, choice, ground_truth):
	image_name_no_ext = os.path.splitext(image_name)[0] # remove ".jpg" ext
	user_choices = load_user_choices()
	is_correct = (choice == ground_truth)

	# save result
	if action not in user_choices:
	user_choices[action] = {}
	user_choices[action][image_name_no_ext] = {
	"choice": choice,
	"ground_truth": ground_truth,
	"is_correct": is_correct
	}
	with open(output_file, 'w') as f:
	json.dump(user_choices, f, indent=2)

	def get_content_at_index(action, index):
	if action not in action_images or index < 0 or index >= len(action_images[action]):
	return None, None, "No more images", "", False, False

	# image
	image_name = action_images[action][index]
	image_path = os.path.join(image_root, action, image_name)

	# text prompt
	text_prompt_path = image_path.replace(".jpg", ".txt")
	text_prompt = (
	open(text_prompt_path, 'r').read().strip()
	if os.path.exists(text_prompt_path)
	else "No text prompt available"
	)

	# Apply font size styling to text_prompt
	text_prompt = f"<div style='font-size: 1.25em;'>{text_prompt}</div>"

	# ground truth
	ground_truth_path = image_path.replace(".jpg", "_answer.txt")
	ground_truth = (
	open(ground_truth_path, 'r').read().strip()
	if os.path.exists(ground_truth_path)
	else "No ground truth available"
	)

	# button states
	enable_prev = index > 0
	enable_next = index < len(action_images[action]) - 1

	return image_path, image_name, text_prompt, ground_truth, enable_prev, enable_next


	# navigate among questions
	def navigate(action, index, direction):
	# update index
	new_index = max(0, min(index + direction, len(action_images[action]) - 1))
	# retrieve context
	image_path, _, text_prompt, ground_truth, enable_prev, enable_next = get_content_at_index(action, new_index)

	# Apply font size styling to text_prompt
	styled_text_prompt = f"<div style='font-size: 1.25em;'>{text_prompt}</div>"

	return (
	image_path,
	styled_text_prompt,
	ground_truth,
	gr.update(value=""),
	gr.update(interactive=enable_prev),
	gr.update(interactive=enable_next),
	new_index
	)


	# Handle user choice submission
	def submit_choice(action, index, choice, ground_truth):
	if action not in action_images or index < 0 or index >= len(action_images[action]):
	return "Invalid demo or keyframe index."

	image_name = action_images[action][index]
	save_user_choice(action, image_name, choice, ground_truth) # Save user choice

	if choice == ground_truth:
	color = "green"
	else:
	color = "red"
	return f'<div style="font-size: 1.25em; color:{color}">Ground Truth: {ground_truth}</div>'



	def change_action(action):
	if action not in action_images:
	return None, "No images available", "No text prompt available", "", gr.update(interactive=False), gr.update(interactive=False), action, 0

	# Get the first image of the new action
	image_path, image_name, text_prompt, ground_truth, enable_prev, enable_next = get_content_at_index(action, 0)

	# Apply font size styling to text_prompt
	styled_text_prompt = f"<div style='font-size: 1.25em;'>{text_prompt}</div>"

	# Reset states
	enable_prev = gr.update(interactive=False) # Disable "Previous" as we're at the start
	enable_next = gr.update(interactive=enable_next) # Enable "Next" if there are more images

	return image_path, styled_text_prompt, ground_truth, gr.update(value=""), enable_prev, enable_next, action, 0

	# initialize data
	action_images = load_action_images()

	def initialize_app():
	if not action_images:
	return None, None, "No actions available", "", gr.update(interactive=False), gr.update(interactive=False), "", 0

	first_action = list(action_images.keys())[0]
	image_path, image_name, text_prompt, ground_truth, enable_prev, enable_next = get_content_at_index(first_action, 0)

	# Force the Previous button to be disabled during initialization
	enable_prev = gr.update(interactive=False)

	return image_path, image_name, text_prompt, ground_truth, enable_prev, gr.update(interactive=enable_next), first_action, 0

	first_image, first_image_name, first_text_prompt, first_ground_truth, enable_prev, enable_next, first_action, first_index = initialize_app()

	# Gradio interface
	with gr.Blocks() as app:
	gr.Markdown("# VLM Embodied Benchmark Human Annotation Interface")

	# states: action, index, ground truth
	current_action = gr.State(value=first_action)
	current_index = gr.State(value=first_index)
	current_ground_truth = gr.State(value=first_ground_truth)

	# UI components
	action_dropdown = gr.Dropdown(choices=list(action_images.keys()), value=first_action, label="Select Demo to Annotate")
	# image = gr.Image(value=first_image, interactive=False, width=500)
	image = gr.Image(value=first_image, interactive=False, width=1500)
	text_prompt = gr.Markdown(value=f"<div style='font-size: 1.25em;'>{first_text_prompt}</div>")
	with gr.Row(): # nav buttons
	prev_button = gr.Button("Previous", interactive=False) # Explicitly disabled during initialization
	next_button = gr.Button("Next", interactive=enable_next["interactive"])
	with gr.Row(): # choice buttons
	a_button = gr.Button("A")
	b_button = gr.Button("B")
	c_button = gr.Button("C")
	d_button = gr.Button("D")
	ground_truth_display = gr.Markdown(value="")

	# change action dropdown
	action_dropdown.change(
	fn=change_action,
	inputs=[action_dropdown],
	outputs=[image, text_prompt, current_ground_truth, ground_truth_display, prev_button, next_button, current_action, current_index]
	)

	# click on navigation buttons
	nav_input = [current_action, current_index]
	nav_output = [image, text_prompt, current_ground_truth, ground_truth_display, prev_button, next_button, current_index]
	prev_button.click(
	fn=lambda action, index: navigate(action, index, -1),
	inputs=nav_input, outputs=nav_output
	)
	next_button.click(
	fn=lambda action, index: navigate(action, index, 1),
	inputs=[current_action, current_index],
	outputs=nav_output
	)

	# click on choice buttons
	input_param = [current_action, current_index, current_ground_truth]
	output_param= [ground_truth_display]
	a_button.click(
	fn=lambda action, index, gt: submit_choice(action, index, "A", gt),
	inputs=input_param, outputs=output_param)
	b_button.click(
	fn=lambda action, index, gt: submit_choice(action, index, "B", gt),
	inputs=input_param, outputs=output_param)
	c_button.click(
	fn=lambda action, index, gt: submit_choice(action, index, "C", gt),
	inputs=input_param, outputs=output_param)
	d_button.click(
	fn=lambda action, index, gt: submit_choice(action, index, "D", gt),
	inputs=input_param, outputs=output_param)

	app.launch(share=True)