Spaces:

bryanzhou008
/

Test_Forward_Dynamics_Annotation

Runtime error

App Files Files Community

bryanzhou008 commited on Jan 16

Commit

9fa9058

verified ·

1 Parent(s): 68080e2

Create app.py

Browse files

Files changed (1) hide show

app.py +216 -0

app.py ADDED Viewed

	@@ -0,0 +1,216 @@

+import gradio as gr
+import json
+import os
+# root directory of image files
+# each action should be saved under subdirectories
+# input filename
+image_root = "forward_dynamics_qa_pairs_v5_w_newline"
+# output filename
+output_file = "v5_forward_dynamics_user_choices.json"
+# # input filename
+# image_root = "/Users/bryan/Desktop/wkdir/VLM/src/human-annotation-interface/inverse_dynamics_qa_pairs_v5_w_newline"
+# # output filename
+# output_file = "/Users/bryan/Desktop/wkdir/VLM/src/human-annotation-interface/v5_inverse_dynamics_user_choices.json"
+if not os.path.exists(output_file):
+    with open(output_file, 'w') as f:
+        json.dump({}, f)
+def load_action_images():
+    action_images = {}
+    for action in os.listdir(image_root):
+        action_dir = os.path.join(image_root, action)
+        if os.path.isdir(action_dir):
+            images = [f for f in os.listdir(action_dir) if f.endswith('.jpg')]
+            images.sort()  # Ensure files are sorted in ascending order
+            action_images[action] = images
+    return action_images
+def load_user_choices():
+    with open(output_file, 'r') as f:
+        return json.load(f)
+def save_user_choice(action, image_name, choice, ground_truth):
+    image_name_no_ext = os.path.splitext(image_name)[0]  # remove ".jpg" ext
+    user_choices = load_user_choices()
+    is_correct = (choice == ground_truth)
+    # save result
+    if action not in user_choices:
+        user_choices[action] = {}
+    user_choices[action][image_name_no_ext] = {
+        "choice": choice,
+        "ground_truth": ground_truth,
+        "is_correct": is_correct
+    }
+    with open(output_file, 'w') as f:
+        json.dump(user_choices, f, indent=2)
+def get_content_at_index(action, index):
+    if action not in action_images or index < 0 or index >= len(action_images[action]):
+        return None, None, "No more images", "", False, False
+    # image
+    image_name = action_images[action][index]
+    image_path = os.path.join(image_root, action, image_name)
+    # text prompt
+    text_prompt_path = image_path.replace(".jpg", ".txt")
+    text_prompt = (
+        open(text_prompt_path, 'r').read().strip()
+        if os.path.exists(text_prompt_path)
+        else "No text prompt available"
+    )
+    # Apply font size styling to text_prompt
+    text_prompt = f"<div style='font-size: 1.25em;'>{text_prompt}</div>"
+    # ground truth
+    ground_truth_path = image_path.replace(".jpg", "_answer.txt")
+    ground_truth = (
+        open(ground_truth_path, 'r').read().strip()
+        if os.path.exists(ground_truth_path)
+        else "No ground truth available"
+    )
+    # button states
+    enable_prev = index > 0
+    enable_next = index < len(action_images[action]) - 1
+    return image_path, image_name, text_prompt, ground_truth, enable_prev, enable_next
+# navigate among questions
+def navigate(action, index, direction):
+    # update index
+    new_index = max(0, min(index + direction, len(action_images[action]) - 1))
+    # retrieve context
+    image_path, _, text_prompt, ground_truth, enable_prev, enable_next = get_content_at_index(action, new_index)
+    # Apply font size styling to text_prompt
+    styled_text_prompt = f"<div style='font-size: 1.25em;'>{text_prompt}</div>"
+    return (
+        image_path,
+        styled_text_prompt,
+        ground_truth,
+        gr.update(value=""),
+        gr.update(interactive=enable_prev),
+        gr.update(interactive=enable_next),
+        new_index
+    )
+# Handle user choice submission
+def submit_choice(action, index, choice, ground_truth):
+    if action not in action_images or index < 0 or index >= len(action_images[action]):
+        return "Invalid demo or keyframe index."
+    image_name = action_images[action][index]
+    save_user_choice(action, image_name, choice, ground_truth)  # Save user choice
+    if choice == ground_truth:
+        color = "green"
+    else:
+        color = "red"
+    return f'<div style="font-size: 1.25em; color:{color}">Ground Truth: {ground_truth}</div>'
+def change_action(action):
+    if action not in action_images:
+        return None, "No images available", "No text prompt available", "", gr.update(interactive=False), gr.update(interactive=False), action, 0
+    # Get the first image of the new action
+    image_path, image_name, text_prompt, ground_truth, enable_prev, enable_next = get_content_at_index(action, 0)
+    # Apply font size styling to text_prompt
+    styled_text_prompt = f"<div style='font-size: 1.25em;'>{text_prompt}</div>"
+    # Reset states
+    enable_prev = gr.update(interactive=False)  # Disable "Previous" as we're at the start
+    enable_next = gr.update(interactive=enable_next)  # Enable "Next" if there are more images
+    return image_path, styled_text_prompt, ground_truth, gr.update(value=""), enable_prev, enable_next, action, 0
+# initialize data
+action_images = load_action_images()
+def initialize_app():
+    if not action_images:
+        return None, None, "No actions available", "", gr.update(interactive=False), gr.update(interactive=False), "", 0
+    first_action = list(action_images.keys())[0]
+    image_path, image_name, text_prompt, ground_truth, enable_prev, enable_next = get_content_at_index(first_action, 0)
+    # Force the Previous button to be disabled during initialization
+    enable_prev = gr.update(interactive=False)
+    return image_path, image_name, text_prompt, ground_truth, enable_prev, gr.update(interactive=enable_next), first_action, 0
+first_image, first_image_name, first_text_prompt, first_ground_truth, enable_prev, enable_next, first_action, first_index = initialize_app()
+# Gradio interface
+with gr.Blocks() as app:
+    gr.Markdown("# VLM Embodied Benchmark Human Annotation Interface")
+    # states: action, index, ground truth
+    current_action = gr.State(value=first_action)
+    current_index = gr.State(value=first_index)
+    current_ground_truth = gr.State(value=first_ground_truth)
+    # UI components
+    action_dropdown = gr.Dropdown(choices=list(action_images.keys()), value=first_action, label="Select Demo to Annotate")
+    # image = gr.Image(value=first_image, interactive=False, width=500)
+    image = gr.Image(value=first_image, interactive=False, width=1500)
+    text_prompt = gr.Markdown(value=f"<div style='font-size: 1.25em;'>{first_text_prompt}</div>")
+    with gr.Row(): # nav buttons
+        prev_button = gr.Button("Previous", interactive=False)  # Explicitly disabled during initialization
+        next_button = gr.Button("Next", interactive=enable_next["interactive"])
+    with gr.Row(): # choice buttons
+        a_button = gr.Button("A")
+        b_button = gr.Button("B")
+        c_button = gr.Button("C")
+        d_button = gr.Button("D")
+    ground_truth_display = gr.Markdown(value="")
+    # change action dropdown
+    action_dropdown.change(
+        fn=change_action,
+        inputs=[action_dropdown],
+        outputs=[image, text_prompt, current_ground_truth, ground_truth_display, prev_button, next_button, current_action, current_index]
+    )
+    # click on navigation buttons
+    nav_input = [current_action, current_index]
+    nav_output = [image, text_prompt, current_ground_truth, ground_truth_display, prev_button, next_button, current_index]
+    prev_button.click(
+        fn=lambda action, index: navigate(action, index, -1),
+        inputs=nav_input, outputs=nav_output
+    )
+    next_button.click(
+        fn=lambda action, index: navigate(action, index, 1),
+        inputs=[current_action, current_index],
+        outputs=nav_output
+    )
+    # click on choice buttons
+    input_param = [current_action, current_index, current_ground_truth]
+    output_param= [ground_truth_display]
+    a_button.click(
+        fn=lambda action, index, gt: submit_choice(action, index, "A", gt),
+        inputs=input_param, outputs=output_param)
+    b_button.click(
+        fn=lambda action, index, gt: submit_choice(action, index, "B", gt),
+        inputs=input_param, outputs=output_param)
+    c_button.click(
+        fn=lambda action, index, gt: submit_choice(action, index, "C", gt),
+        inputs=input_param, outputs=output_param)
+    d_button.click(
+        fn=lambda  action, index, gt: submit_choice(action, index, "D", gt),
+        inputs=input_param, outputs=output_param)
+app.launch(share=True)