Spaces:
Runtime error
Runtime error
import gradio as gr | |
import json | |
import os | |
# root directory of image files | |
# each action should be saved under subdirectories | |
# input filename | |
image_root = "forward_dynamics_qa_pairs_v5_w_newline" | |
# output filename | |
output_file = "v5_forward_dynamics_user_choices.json" | |
# # input filename | |
# image_root = "/Users/bryan/Desktop/wkdir/VLM/src/human-annotation-interface/inverse_dynamics_qa_pairs_v5_w_newline" | |
# # output filename | |
# output_file = "/Users/bryan/Desktop/wkdir/VLM/src/human-annotation-interface/v5_inverse_dynamics_user_choices.json" | |
if not os.path.exists(output_file): | |
with open(output_file, 'w') as f: | |
json.dump({}, f) | |
def load_action_images(): | |
action_images = {} | |
for action in os.listdir(image_root): | |
action_dir = os.path.join(image_root, action) | |
if os.path.isdir(action_dir): | |
images = [f for f in os.listdir(action_dir) if f.endswith('.jpg')] | |
images.sort() # Ensure files are sorted in ascending order | |
action_images[action] = images | |
return action_images | |
def load_user_choices(): | |
with open(output_file, 'r') as f: | |
return json.load(f) | |
def save_user_choice(action, image_name, choice, ground_truth): | |
image_name_no_ext = os.path.splitext(image_name)[0] # remove ".jpg" ext | |
user_choices = load_user_choices() | |
is_correct = (choice == ground_truth) | |
# save result | |
if action not in user_choices: | |
user_choices[action] = {} | |
user_choices[action][image_name_no_ext] = { | |
"choice": choice, | |
"ground_truth": ground_truth, | |
"is_correct": is_correct | |
} | |
with open(output_file, 'w') as f: | |
json.dump(user_choices, f, indent=2) | |
def get_content_at_index(action, index): | |
if action not in action_images or index < 0 or index >= len(action_images[action]): | |
return None, None, "No more images", "", False, False | |
# image | |
image_name = action_images[action][index] | |
image_path = os.path.join(image_root, action, image_name) | |
# text prompt | |
text_prompt_path = image_path.replace(".jpg", ".txt") | |
text_prompt = ( | |
open(text_prompt_path, 'r').read().strip() | |
if os.path.exists(text_prompt_path) | |
else "No text prompt available" | |
) | |
# Apply font size styling to text_prompt | |
text_prompt = f"<div style='font-size: 1.25em;'>{text_prompt}</div>" | |
# ground truth | |
ground_truth_path = image_path.replace(".jpg", "_answer.txt") | |
ground_truth = ( | |
open(ground_truth_path, 'r').read().strip() | |
if os.path.exists(ground_truth_path) | |
else "No ground truth available" | |
) | |
# button states | |
enable_prev = index > 0 | |
enable_next = index < len(action_images[action]) - 1 | |
return image_path, image_name, text_prompt, ground_truth, enable_prev, enable_next | |
# navigate among questions | |
def navigate(action, index, direction): | |
# update index | |
new_index = max(0, min(index + direction, len(action_images[action]) - 1)) | |
# retrieve context | |
image_path, _, text_prompt, ground_truth, enable_prev, enable_next = get_content_at_index(action, new_index) | |
# Apply font size styling to text_prompt | |
styled_text_prompt = f"<div style='font-size: 1.25em;'>{text_prompt}</div>" | |
return ( | |
image_path, | |
styled_text_prompt, | |
ground_truth, | |
gr.update(value=""), | |
gr.update(interactive=enable_prev), | |
gr.update(interactive=enable_next), | |
new_index | |
) | |
# Handle user choice submission | |
def submit_choice(action, index, choice, ground_truth): | |
if action not in action_images or index < 0 or index >= len(action_images[action]): | |
return "Invalid demo or keyframe index." | |
image_name = action_images[action][index] | |
save_user_choice(action, image_name, choice, ground_truth) # Save user choice | |
if choice == ground_truth: | |
color = "green" | |
else: | |
color = "red" | |
return f'<div style="font-size: 1.25em; color:{color}">Ground Truth: {ground_truth}</div>' | |
def change_action(action): | |
if action not in action_images: | |
return None, "No images available", "No text prompt available", "", gr.update(interactive=False), gr.update(interactive=False), action, 0 | |
# Get the first image of the new action | |
image_path, image_name, text_prompt, ground_truth, enable_prev, enable_next = get_content_at_index(action, 0) | |
# Apply font size styling to text_prompt | |
styled_text_prompt = f"<div style='font-size: 1.25em;'>{text_prompt}</div>" | |
# Reset states | |
enable_prev = gr.update(interactive=False) # Disable "Previous" as we're at the start | |
enable_next = gr.update(interactive=enable_next) # Enable "Next" if there are more images | |
return image_path, styled_text_prompt, ground_truth, gr.update(value=""), enable_prev, enable_next, action, 0 | |
# initialize data | |
action_images = load_action_images() | |
def initialize_app(): | |
if not action_images: | |
return None, None, "No actions available", "", gr.update(interactive=False), gr.update(interactive=False), "", 0 | |
first_action = list(action_images.keys())[0] | |
image_path, image_name, text_prompt, ground_truth, enable_prev, enable_next = get_content_at_index(first_action, 0) | |
# Force the Previous button to be disabled during initialization | |
enable_prev = gr.update(interactive=False) | |
return image_path, image_name, text_prompt, ground_truth, enable_prev, gr.update(interactive=enable_next), first_action, 0 | |
first_image, first_image_name, first_text_prompt, first_ground_truth, enable_prev, enable_next, first_action, first_index = initialize_app() | |
# Gradio interface | |
with gr.Blocks() as app: | |
gr.Markdown("# VLM Embodied Benchmark Human Annotation Interface") | |
# states: action, index, ground truth | |
current_action = gr.State(value=first_action) | |
current_index = gr.State(value=first_index) | |
current_ground_truth = gr.State(value=first_ground_truth) | |
# UI components | |
action_dropdown = gr.Dropdown(choices=list(action_images.keys()), value=first_action, label="Select Demo to Annotate") | |
# image = gr.Image(value=first_image, interactive=False, width=500) | |
image = gr.Image(value=first_image, interactive=False, width=1500) | |
text_prompt = gr.Markdown(value=f"<div style='font-size: 1.25em;'>{first_text_prompt}</div>") | |
with gr.Row(): # nav buttons | |
prev_button = gr.Button("Previous", interactive=False) # Explicitly disabled during initialization | |
next_button = gr.Button("Next", interactive=enable_next["interactive"]) | |
with gr.Row(): # choice buttons | |
a_button = gr.Button("A") | |
b_button = gr.Button("B") | |
c_button = gr.Button("C") | |
d_button = gr.Button("D") | |
ground_truth_display = gr.Markdown(value="") | |
# change action dropdown | |
action_dropdown.change( | |
fn=change_action, | |
inputs=[action_dropdown], | |
outputs=[image, text_prompt, current_ground_truth, ground_truth_display, prev_button, next_button, current_action, current_index] | |
) | |
# click on navigation buttons | |
nav_input = [current_action, current_index] | |
nav_output = [image, text_prompt, current_ground_truth, ground_truth_display, prev_button, next_button, current_index] | |
prev_button.click( | |
fn=lambda action, index: navigate(action, index, -1), | |
inputs=nav_input, outputs=nav_output | |
) | |
next_button.click( | |
fn=lambda action, index: navigate(action, index, 1), | |
inputs=[current_action, current_index], | |
outputs=nav_output | |
) | |
# click on choice buttons | |
input_param = [current_action, current_index, current_ground_truth] | |
output_param= [ground_truth_display] | |
a_button.click( | |
fn=lambda action, index, gt: submit_choice(action, index, "A", gt), | |
inputs=input_param, outputs=output_param) | |
b_button.click( | |
fn=lambda action, index, gt: submit_choice(action, index, "B", gt), | |
inputs=input_param, outputs=output_param) | |
c_button.click( | |
fn=lambda action, index, gt: submit_choice(action, index, "C", gt), | |
inputs=input_param, outputs=output_param) | |
d_button.click( | |
fn=lambda action, index, gt: submit_choice(action, index, "D", gt), | |
inputs=input_param, outputs=output_param) | |
app.launch(share=True) |