bryanzhou008 commited on
Commit
9fa9058
·
verified ·
1 Parent(s): 68080e2

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +216 -0
app.py ADDED
@@ -0,0 +1,216 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import json
3
+ import os
4
+
5
+ # root directory of image files
6
+ # each action should be saved under subdirectories
7
+ # input filename
8
+ image_root = "forward_dynamics_qa_pairs_v5_w_newline"
9
+ # output filename
10
+ output_file = "v5_forward_dynamics_user_choices.json"
11
+
12
+ # # input filename
13
+ # image_root = "/Users/bryan/Desktop/wkdir/VLM/src/human-annotation-interface/inverse_dynamics_qa_pairs_v5_w_newline"
14
+ # # output filename
15
+ # output_file = "/Users/bryan/Desktop/wkdir/VLM/src/human-annotation-interface/v5_inverse_dynamics_user_choices.json"
16
+
17
+
18
+ if not os.path.exists(output_file):
19
+ with open(output_file, 'w') as f:
20
+ json.dump({}, f)
21
+
22
+ def load_action_images():
23
+ action_images = {}
24
+ for action in os.listdir(image_root):
25
+ action_dir = os.path.join(image_root, action)
26
+ if os.path.isdir(action_dir):
27
+ images = [f for f in os.listdir(action_dir) if f.endswith('.jpg')]
28
+ images.sort() # Ensure files are sorted in ascending order
29
+ action_images[action] = images
30
+ return action_images
31
+
32
+ def load_user_choices():
33
+ with open(output_file, 'r') as f:
34
+ return json.load(f)
35
+
36
+ def save_user_choice(action, image_name, choice, ground_truth):
37
+ image_name_no_ext = os.path.splitext(image_name)[0] # remove ".jpg" ext
38
+ user_choices = load_user_choices()
39
+ is_correct = (choice == ground_truth)
40
+
41
+ # save result
42
+ if action not in user_choices:
43
+ user_choices[action] = {}
44
+ user_choices[action][image_name_no_ext] = {
45
+ "choice": choice,
46
+ "ground_truth": ground_truth,
47
+ "is_correct": is_correct
48
+ }
49
+ with open(output_file, 'w') as f:
50
+ json.dump(user_choices, f, indent=2)
51
+
52
+ def get_content_at_index(action, index):
53
+ if action not in action_images or index < 0 or index >= len(action_images[action]):
54
+ return None, None, "No more images", "", False, False
55
+
56
+ # image
57
+ image_name = action_images[action][index]
58
+ image_path = os.path.join(image_root, action, image_name)
59
+
60
+ # text prompt
61
+ text_prompt_path = image_path.replace(".jpg", ".txt")
62
+ text_prompt = (
63
+ open(text_prompt_path, 'r').read().strip()
64
+ if os.path.exists(text_prompt_path)
65
+ else "No text prompt available"
66
+ )
67
+
68
+ # Apply font size styling to text_prompt
69
+ text_prompt = f"<div style='font-size: 1.25em;'>{text_prompt}</div>"
70
+
71
+ # ground truth
72
+ ground_truth_path = image_path.replace(".jpg", "_answer.txt")
73
+ ground_truth = (
74
+ open(ground_truth_path, 'r').read().strip()
75
+ if os.path.exists(ground_truth_path)
76
+ else "No ground truth available"
77
+ )
78
+
79
+ # button states
80
+ enable_prev = index > 0
81
+ enable_next = index < len(action_images[action]) - 1
82
+
83
+ return image_path, image_name, text_prompt, ground_truth, enable_prev, enable_next
84
+
85
+
86
+ # navigate among questions
87
+ def navigate(action, index, direction):
88
+ # update index
89
+ new_index = max(0, min(index + direction, len(action_images[action]) - 1))
90
+ # retrieve context
91
+ image_path, _, text_prompt, ground_truth, enable_prev, enable_next = get_content_at_index(action, new_index)
92
+
93
+ # Apply font size styling to text_prompt
94
+ styled_text_prompt = f"<div style='font-size: 1.25em;'>{text_prompt}</div>"
95
+
96
+ return (
97
+ image_path,
98
+ styled_text_prompt,
99
+ ground_truth,
100
+ gr.update(value=""),
101
+ gr.update(interactive=enable_prev),
102
+ gr.update(interactive=enable_next),
103
+ new_index
104
+ )
105
+
106
+
107
+ # Handle user choice submission
108
+ def submit_choice(action, index, choice, ground_truth):
109
+ if action not in action_images or index < 0 or index >= len(action_images[action]):
110
+ return "Invalid demo or keyframe index."
111
+
112
+ image_name = action_images[action][index]
113
+ save_user_choice(action, image_name, choice, ground_truth) # Save user choice
114
+
115
+ if choice == ground_truth:
116
+ color = "green"
117
+ else:
118
+ color = "red"
119
+ return f'<div style="font-size: 1.25em; color:{color}">Ground Truth: {ground_truth}</div>'
120
+
121
+
122
+
123
+ def change_action(action):
124
+ if action not in action_images:
125
+ return None, "No images available", "No text prompt available", "", gr.update(interactive=False), gr.update(interactive=False), action, 0
126
+
127
+ # Get the first image of the new action
128
+ image_path, image_name, text_prompt, ground_truth, enable_prev, enable_next = get_content_at_index(action, 0)
129
+
130
+ # Apply font size styling to text_prompt
131
+ styled_text_prompt = f"<div style='font-size: 1.25em;'>{text_prompt}</div>"
132
+
133
+ # Reset states
134
+ enable_prev = gr.update(interactive=False) # Disable "Previous" as we're at the start
135
+ enable_next = gr.update(interactive=enable_next) # Enable "Next" if there are more images
136
+
137
+ return image_path, styled_text_prompt, ground_truth, gr.update(value=""), enable_prev, enable_next, action, 0
138
+
139
+ # initialize data
140
+ action_images = load_action_images()
141
+
142
+ def initialize_app():
143
+ if not action_images:
144
+ return None, None, "No actions available", "", gr.update(interactive=False), gr.update(interactive=False), "", 0
145
+
146
+ first_action = list(action_images.keys())[0]
147
+ image_path, image_name, text_prompt, ground_truth, enable_prev, enable_next = get_content_at_index(first_action, 0)
148
+
149
+ # Force the Previous button to be disabled during initialization
150
+ enable_prev = gr.update(interactive=False)
151
+
152
+ return image_path, image_name, text_prompt, ground_truth, enable_prev, gr.update(interactive=enable_next), first_action, 0
153
+
154
+ first_image, first_image_name, first_text_prompt, first_ground_truth, enable_prev, enable_next, first_action, first_index = initialize_app()
155
+
156
+ # Gradio interface
157
+ with gr.Blocks() as app:
158
+ gr.Markdown("# VLM Embodied Benchmark Human Annotation Interface")
159
+
160
+ # states: action, index, ground truth
161
+ current_action = gr.State(value=first_action)
162
+ current_index = gr.State(value=first_index)
163
+ current_ground_truth = gr.State(value=first_ground_truth)
164
+
165
+ # UI components
166
+ action_dropdown = gr.Dropdown(choices=list(action_images.keys()), value=first_action, label="Select Demo to Annotate")
167
+ # image = gr.Image(value=first_image, interactive=False, width=500)
168
+ image = gr.Image(value=first_image, interactive=False, width=1500)
169
+ text_prompt = gr.Markdown(value=f"<div style='font-size: 1.25em;'>{first_text_prompt}</div>")
170
+ with gr.Row(): # nav buttons
171
+ prev_button = gr.Button("Previous", interactive=False) # Explicitly disabled during initialization
172
+ next_button = gr.Button("Next", interactive=enable_next["interactive"])
173
+ with gr.Row(): # choice buttons
174
+ a_button = gr.Button("A")
175
+ b_button = gr.Button("B")
176
+ c_button = gr.Button("C")
177
+ d_button = gr.Button("D")
178
+ ground_truth_display = gr.Markdown(value="")
179
+
180
+ # change action dropdown
181
+ action_dropdown.change(
182
+ fn=change_action,
183
+ inputs=[action_dropdown],
184
+ outputs=[image, text_prompt, current_ground_truth, ground_truth_display, prev_button, next_button, current_action, current_index]
185
+ )
186
+
187
+ # click on navigation buttons
188
+ nav_input = [current_action, current_index]
189
+ nav_output = [image, text_prompt, current_ground_truth, ground_truth_display, prev_button, next_button, current_index]
190
+ prev_button.click(
191
+ fn=lambda action, index: navigate(action, index, -1),
192
+ inputs=nav_input, outputs=nav_output
193
+ )
194
+ next_button.click(
195
+ fn=lambda action, index: navigate(action, index, 1),
196
+ inputs=[current_action, current_index],
197
+ outputs=nav_output
198
+ )
199
+
200
+ # click on choice buttons
201
+ input_param = [current_action, current_index, current_ground_truth]
202
+ output_param= [ground_truth_display]
203
+ a_button.click(
204
+ fn=lambda action, index, gt: submit_choice(action, index, "A", gt),
205
+ inputs=input_param, outputs=output_param)
206
+ b_button.click(
207
+ fn=lambda action, index, gt: submit_choice(action, index, "B", gt),
208
+ inputs=input_param, outputs=output_param)
209
+ c_button.click(
210
+ fn=lambda action, index, gt: submit_choice(action, index, "C", gt),
211
+ inputs=input_param, outputs=output_param)
212
+ d_button.click(
213
+ fn=lambda action, index, gt: submit_choice(action, index, "D", gt),
214
+ inputs=input_param, outputs=output_param)
215
+
216
+ app.launch(share=True)