Spaces:
Runtime error
Runtime error
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,216 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import json
|
3 |
+
import os
|
4 |
+
|
5 |
+
# root directory of image files
|
6 |
+
# each action should be saved under subdirectories
|
7 |
+
# input filename
|
8 |
+
image_root = "forward_dynamics_qa_pairs_v5_w_newline"
|
9 |
+
# output filename
|
10 |
+
output_file = "v5_forward_dynamics_user_choices.json"
|
11 |
+
|
12 |
+
# # input filename
|
13 |
+
# image_root = "/Users/bryan/Desktop/wkdir/VLM/src/human-annotation-interface/inverse_dynamics_qa_pairs_v5_w_newline"
|
14 |
+
# # output filename
|
15 |
+
# output_file = "/Users/bryan/Desktop/wkdir/VLM/src/human-annotation-interface/v5_inverse_dynamics_user_choices.json"
|
16 |
+
|
17 |
+
|
18 |
+
if not os.path.exists(output_file):
|
19 |
+
with open(output_file, 'w') as f:
|
20 |
+
json.dump({}, f)
|
21 |
+
|
22 |
+
def load_action_images():
|
23 |
+
action_images = {}
|
24 |
+
for action in os.listdir(image_root):
|
25 |
+
action_dir = os.path.join(image_root, action)
|
26 |
+
if os.path.isdir(action_dir):
|
27 |
+
images = [f for f in os.listdir(action_dir) if f.endswith('.jpg')]
|
28 |
+
images.sort() # Ensure files are sorted in ascending order
|
29 |
+
action_images[action] = images
|
30 |
+
return action_images
|
31 |
+
|
32 |
+
def load_user_choices():
|
33 |
+
with open(output_file, 'r') as f:
|
34 |
+
return json.load(f)
|
35 |
+
|
36 |
+
def save_user_choice(action, image_name, choice, ground_truth):
|
37 |
+
image_name_no_ext = os.path.splitext(image_name)[0] # remove ".jpg" ext
|
38 |
+
user_choices = load_user_choices()
|
39 |
+
is_correct = (choice == ground_truth)
|
40 |
+
|
41 |
+
# save result
|
42 |
+
if action not in user_choices:
|
43 |
+
user_choices[action] = {}
|
44 |
+
user_choices[action][image_name_no_ext] = {
|
45 |
+
"choice": choice,
|
46 |
+
"ground_truth": ground_truth,
|
47 |
+
"is_correct": is_correct
|
48 |
+
}
|
49 |
+
with open(output_file, 'w') as f:
|
50 |
+
json.dump(user_choices, f, indent=2)
|
51 |
+
|
52 |
+
def get_content_at_index(action, index):
|
53 |
+
if action not in action_images or index < 0 or index >= len(action_images[action]):
|
54 |
+
return None, None, "No more images", "", False, False
|
55 |
+
|
56 |
+
# image
|
57 |
+
image_name = action_images[action][index]
|
58 |
+
image_path = os.path.join(image_root, action, image_name)
|
59 |
+
|
60 |
+
# text prompt
|
61 |
+
text_prompt_path = image_path.replace(".jpg", ".txt")
|
62 |
+
text_prompt = (
|
63 |
+
open(text_prompt_path, 'r').read().strip()
|
64 |
+
if os.path.exists(text_prompt_path)
|
65 |
+
else "No text prompt available"
|
66 |
+
)
|
67 |
+
|
68 |
+
# Apply font size styling to text_prompt
|
69 |
+
text_prompt = f"<div style='font-size: 1.25em;'>{text_prompt}</div>"
|
70 |
+
|
71 |
+
# ground truth
|
72 |
+
ground_truth_path = image_path.replace(".jpg", "_answer.txt")
|
73 |
+
ground_truth = (
|
74 |
+
open(ground_truth_path, 'r').read().strip()
|
75 |
+
if os.path.exists(ground_truth_path)
|
76 |
+
else "No ground truth available"
|
77 |
+
)
|
78 |
+
|
79 |
+
# button states
|
80 |
+
enable_prev = index > 0
|
81 |
+
enable_next = index < len(action_images[action]) - 1
|
82 |
+
|
83 |
+
return image_path, image_name, text_prompt, ground_truth, enable_prev, enable_next
|
84 |
+
|
85 |
+
|
86 |
+
# navigate among questions
|
87 |
+
def navigate(action, index, direction):
|
88 |
+
# update index
|
89 |
+
new_index = max(0, min(index + direction, len(action_images[action]) - 1))
|
90 |
+
# retrieve context
|
91 |
+
image_path, _, text_prompt, ground_truth, enable_prev, enable_next = get_content_at_index(action, new_index)
|
92 |
+
|
93 |
+
# Apply font size styling to text_prompt
|
94 |
+
styled_text_prompt = f"<div style='font-size: 1.25em;'>{text_prompt}</div>"
|
95 |
+
|
96 |
+
return (
|
97 |
+
image_path,
|
98 |
+
styled_text_prompt,
|
99 |
+
ground_truth,
|
100 |
+
gr.update(value=""),
|
101 |
+
gr.update(interactive=enable_prev),
|
102 |
+
gr.update(interactive=enable_next),
|
103 |
+
new_index
|
104 |
+
)
|
105 |
+
|
106 |
+
|
107 |
+
# Handle user choice submission
|
108 |
+
def submit_choice(action, index, choice, ground_truth):
|
109 |
+
if action not in action_images or index < 0 or index >= len(action_images[action]):
|
110 |
+
return "Invalid demo or keyframe index."
|
111 |
+
|
112 |
+
image_name = action_images[action][index]
|
113 |
+
save_user_choice(action, image_name, choice, ground_truth) # Save user choice
|
114 |
+
|
115 |
+
if choice == ground_truth:
|
116 |
+
color = "green"
|
117 |
+
else:
|
118 |
+
color = "red"
|
119 |
+
return f'<div style="font-size: 1.25em; color:{color}">Ground Truth: {ground_truth}</div>'
|
120 |
+
|
121 |
+
|
122 |
+
|
123 |
+
def change_action(action):
|
124 |
+
if action not in action_images:
|
125 |
+
return None, "No images available", "No text prompt available", "", gr.update(interactive=False), gr.update(interactive=False), action, 0
|
126 |
+
|
127 |
+
# Get the first image of the new action
|
128 |
+
image_path, image_name, text_prompt, ground_truth, enable_prev, enable_next = get_content_at_index(action, 0)
|
129 |
+
|
130 |
+
# Apply font size styling to text_prompt
|
131 |
+
styled_text_prompt = f"<div style='font-size: 1.25em;'>{text_prompt}</div>"
|
132 |
+
|
133 |
+
# Reset states
|
134 |
+
enable_prev = gr.update(interactive=False) # Disable "Previous" as we're at the start
|
135 |
+
enable_next = gr.update(interactive=enable_next) # Enable "Next" if there are more images
|
136 |
+
|
137 |
+
return image_path, styled_text_prompt, ground_truth, gr.update(value=""), enable_prev, enable_next, action, 0
|
138 |
+
|
139 |
+
# initialize data
|
140 |
+
action_images = load_action_images()
|
141 |
+
|
142 |
+
def initialize_app():
|
143 |
+
if not action_images:
|
144 |
+
return None, None, "No actions available", "", gr.update(interactive=False), gr.update(interactive=False), "", 0
|
145 |
+
|
146 |
+
first_action = list(action_images.keys())[0]
|
147 |
+
image_path, image_name, text_prompt, ground_truth, enable_prev, enable_next = get_content_at_index(first_action, 0)
|
148 |
+
|
149 |
+
# Force the Previous button to be disabled during initialization
|
150 |
+
enable_prev = gr.update(interactive=False)
|
151 |
+
|
152 |
+
return image_path, image_name, text_prompt, ground_truth, enable_prev, gr.update(interactive=enable_next), first_action, 0
|
153 |
+
|
154 |
+
first_image, first_image_name, first_text_prompt, first_ground_truth, enable_prev, enable_next, first_action, first_index = initialize_app()
|
155 |
+
|
156 |
+
# Gradio interface
|
157 |
+
with gr.Blocks() as app:
|
158 |
+
gr.Markdown("# VLM Embodied Benchmark Human Annotation Interface")
|
159 |
+
|
160 |
+
# states: action, index, ground truth
|
161 |
+
current_action = gr.State(value=first_action)
|
162 |
+
current_index = gr.State(value=first_index)
|
163 |
+
current_ground_truth = gr.State(value=first_ground_truth)
|
164 |
+
|
165 |
+
# UI components
|
166 |
+
action_dropdown = gr.Dropdown(choices=list(action_images.keys()), value=first_action, label="Select Demo to Annotate")
|
167 |
+
# image = gr.Image(value=first_image, interactive=False, width=500)
|
168 |
+
image = gr.Image(value=first_image, interactive=False, width=1500)
|
169 |
+
text_prompt = gr.Markdown(value=f"<div style='font-size: 1.25em;'>{first_text_prompt}</div>")
|
170 |
+
with gr.Row(): # nav buttons
|
171 |
+
prev_button = gr.Button("Previous", interactive=False) # Explicitly disabled during initialization
|
172 |
+
next_button = gr.Button("Next", interactive=enable_next["interactive"])
|
173 |
+
with gr.Row(): # choice buttons
|
174 |
+
a_button = gr.Button("A")
|
175 |
+
b_button = gr.Button("B")
|
176 |
+
c_button = gr.Button("C")
|
177 |
+
d_button = gr.Button("D")
|
178 |
+
ground_truth_display = gr.Markdown(value="")
|
179 |
+
|
180 |
+
# change action dropdown
|
181 |
+
action_dropdown.change(
|
182 |
+
fn=change_action,
|
183 |
+
inputs=[action_dropdown],
|
184 |
+
outputs=[image, text_prompt, current_ground_truth, ground_truth_display, prev_button, next_button, current_action, current_index]
|
185 |
+
)
|
186 |
+
|
187 |
+
# click on navigation buttons
|
188 |
+
nav_input = [current_action, current_index]
|
189 |
+
nav_output = [image, text_prompt, current_ground_truth, ground_truth_display, prev_button, next_button, current_index]
|
190 |
+
prev_button.click(
|
191 |
+
fn=lambda action, index: navigate(action, index, -1),
|
192 |
+
inputs=nav_input, outputs=nav_output
|
193 |
+
)
|
194 |
+
next_button.click(
|
195 |
+
fn=lambda action, index: navigate(action, index, 1),
|
196 |
+
inputs=[current_action, current_index],
|
197 |
+
outputs=nav_output
|
198 |
+
)
|
199 |
+
|
200 |
+
# click on choice buttons
|
201 |
+
input_param = [current_action, current_index, current_ground_truth]
|
202 |
+
output_param= [ground_truth_display]
|
203 |
+
a_button.click(
|
204 |
+
fn=lambda action, index, gt: submit_choice(action, index, "A", gt),
|
205 |
+
inputs=input_param, outputs=output_param)
|
206 |
+
b_button.click(
|
207 |
+
fn=lambda action, index, gt: submit_choice(action, index, "B", gt),
|
208 |
+
inputs=input_param, outputs=output_param)
|
209 |
+
c_button.click(
|
210 |
+
fn=lambda action, index, gt: submit_choice(action, index, "C", gt),
|
211 |
+
inputs=input_param, outputs=output_param)
|
212 |
+
d_button.click(
|
213 |
+
fn=lambda action, index, gt: submit_choice(action, index, "D", gt),
|
214 |
+
inputs=input_param, outputs=output_param)
|
215 |
+
|
216 |
+
app.launch(share=True)
|