Spaces:
Runtime error
Runtime error
praeclarumjj3
commited on
Commit
•
016e4dd
1
Parent(s):
a97500b
Update app.py
Browse files
app.py
CHANGED
@@ -80,48 +80,51 @@ def flag_last_response(state, model_selector, request: gr.Request):
|
|
80 |
vote_last_response(state, "flag", model_selector, request)
|
81 |
return ("",) + (disable_btn,) * 3
|
82 |
|
83 |
-
def regenerate(state, image_process_mode, seg_process_mode):
|
84 |
state.messages[-1][-1] = None
|
85 |
prev_human_msg = state.messages[-2]
|
86 |
if type(prev_human_msg[1]) in (tuple, list):
|
87 |
-
prev_human_msg[1] = (*prev_human_msg[1][:2], image_process_mode, prev_human_msg[1][3], seg_process_mode,
|
88 |
state.skip_next = False
|
89 |
-
return (state, state.to_gradio_chatbot(), "", None, None) + (disable_btn,) * 5
|
90 |
|
91 |
|
92 |
def clear_history(request: gr.Request):
|
93 |
state = default_conversation.copy()
|
94 |
-
return (state, state.to_gradio_chatbot(), "", None, None) + (disable_btn,) * 5
|
95 |
|
96 |
|
97 |
def add_text(state, text, image, image_process_mode, seg, seg_process_mode, depth, depth_process_mode, request: gr.Request):
|
98 |
logger.info(f"add_text. len: {len(text)}")
|
99 |
if len(text) <= 0 and image is None:
|
100 |
state.skip_next = True
|
101 |
-
return (state, state.to_gradio_chatbot(), "", None, None) + (no_change_btn,) * 5
|
102 |
if args.moderate:
|
103 |
flagged = violates_moderation(text)
|
104 |
if flagged:
|
105 |
state.skip_next = True
|
106 |
-
return (state, state.to_gradio_chatbot(), moderation_msg, None, None) + (
|
107 |
no_change_btn,) * 5
|
108 |
|
109 |
-
text = text[:
|
110 |
if image is not None:
|
111 |
-
text = text[:
|
112 |
if '<image>' not in text:
|
113 |
text = '<image>\n' + text
|
114 |
if seg is not None:
|
115 |
if '<seg>' not in text:
|
116 |
text = '<seg>\n' + text
|
|
|
|
|
|
|
117 |
|
118 |
-
text = (text, image, image_process_mode, seg, seg_process_mode,
|
119 |
if len(state.get_images(return_pil=True)) > 0:
|
120 |
state = default_conversation.copy()
|
121 |
state.append_message(state.roles[0], text)
|
122 |
state.append_message(state.roles[1], None)
|
123 |
state.skip_next = False
|
124 |
-
return (state, state.to_gradio_chatbot(), "", None, None) + (disable_btn,) * 5
|
125 |
|
126 |
|
127 |
def http_bot(state, model_selector, temperature, top_p, max_new_tokens, request: gr.Request):
|
@@ -145,24 +148,6 @@ def http_bot(state, model_selector, temperature, top_p, max_new_tokens, request:
|
|
145 |
# Construct prompt
|
146 |
prompt = state.get_prompt()
|
147 |
|
148 |
-
all_images = state.get_images(return_pil=True)
|
149 |
-
all_image_hash = [hashlib.md5(image.tobytes()).hexdigest() for image in all_images]
|
150 |
-
for image, hash in zip(all_images, all_image_hash):
|
151 |
-
t = datetime.datetime.now()
|
152 |
-
filename = os.path.join(LOGDIR, "serve_images", f"{t.year}-{t.month:02d}-{t.day:02d}", f"{hash}.jpg")
|
153 |
-
if not os.path.isfile(filename):
|
154 |
-
os.makedirs(os.path.dirname(filename), exist_ok=True)
|
155 |
-
image.save(filename)
|
156 |
-
|
157 |
-
all_segs = state.get_segs(return_pil=True)
|
158 |
-
all_seg_hash = [hashlib.md5(seg.tobytes()).hexdigest() for seg in all_segs]
|
159 |
-
for seg, hash in zip(all_segs, all_seg_hash):
|
160 |
-
t = datetime.datetime.now()
|
161 |
-
filename = os.path.join(LOGDIR, "serve_segs", f"{t.year}-{t.month:02d}-{t.day:02d}", f"{hash}.jpg")
|
162 |
-
if not os.path.isfile(filename):
|
163 |
-
os.makedirs(os.path.dirname(filename), exist_ok=True)
|
164 |
-
seg.save(filename)
|
165 |
-
|
166 |
# Make requests
|
167 |
pload = {
|
168 |
"model": model_name,
|
@@ -171,13 +156,15 @@ def http_bot(state, model_selector, temperature, top_p, max_new_tokens, request:
|
|
171 |
"top_p": float(top_p),
|
172 |
"max_new_tokens": min(int(max_new_tokens), 1536),
|
173 |
"stop": state.sep if state.sep_style in [SeparatorStyle.SINGLE, SeparatorStyle.MPT] else state.sep2,
|
174 |
-
"images": f'List of {len(state.get_images())}
|
175 |
-
"segs": f'List of {len(state.get_segs())}
|
|
|
176 |
}
|
177 |
logger.info(f"==== request ====\n{pload}")
|
178 |
|
179 |
pload['images'] = state.get_images()
|
180 |
pload['segs'] = state.get_segs()
|
|
|
181 |
|
182 |
state.messages[-1][-1] = "▌"
|
183 |
yield (state, state.to_gradio_chatbot()) + (disable_btn,) * 5
|
@@ -207,24 +194,8 @@ def http_bot(state, model_selector, temperature, top_p, max_new_tokens, request:
|
|
207 |
|
208 |
state.messages[-1][-1] = state.messages[-1][-1][:-1]
|
209 |
yield (state, state.to_gradio_chatbot()) + (enable_btn,) * 5
|
210 |
-
|
211 |
-
finish_tstamp = time.time()
|
212 |
logger.info(f"{output}")
|
213 |
|
214 |
-
with open(get_conv_log_filename(), "a") as fout:
|
215 |
-
data = {
|
216 |
-
"tstamp": round(finish_tstamp, 4),
|
217 |
-
"type": "chat",
|
218 |
-
"model": model_name,
|
219 |
-
"start": round(start_tstamp, 4),
|
220 |
-
"finish": round(start_tstamp, 4),
|
221 |
-
"state": state.dict(),
|
222 |
-
"images": all_image_hash,
|
223 |
-
"segs": all_seg_hash,
|
224 |
-
"ip": request.client.host,
|
225 |
-
}
|
226 |
-
fout.write(json.dumps(data) + "\n")
|
227 |
-
|
228 |
|
229 |
title = "<h1 style='margin-bottom: -10px; text-align: center'>VCoder: Versatile Vision Encoders for Multimodal Large Language Models</h1>"
|
230 |
# style='
|
@@ -284,6 +255,12 @@ def build_demo(embed_mode):
|
|
284 |
["Crop", "Resize", "Pad", "Default"],
|
285 |
value="Default",
|
286 |
label="Preprocess for non-square Seg Map", visible=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
287 |
|
288 |
with gr.Accordion("Parameters", open=False) as parameter_row:
|
289 |
temperature = gr.Slider(minimum=0.0, maximum=1.0, value=0.8, step=0.1, interactive=True, label="Temperature",)
|
@@ -307,13 +284,8 @@ def build_demo(embed_mode):
|
|
307 |
|
308 |
cur_dir = os.path.dirname(os.path.abspath(__file__))
|
309 |
gr.Examples(examples=[
|
310 |
-
[f"{cur_dir}/examples/
|
311 |
-
|
312 |
-
[f"{cur_dir}/examples/friends.jpg", f"{cur_dir}/examples/friends_pan.png", "Can you count the number of people in the image?", "0.8", "0.9"],
|
313 |
-
[f"{cur_dir}/examples/friends.jpg", f"{cur_dir}/examples/friends_pan.png", "What is happening in the image?", "0.8", "0.9"],
|
314 |
-
[f"{cur_dir}/examples/suits.jpg", f"{cur_dir}/examples/suits_pan.png", "What objects can be seen in the image?", "0.5", "0.5"],
|
315 |
-
[f"{cur_dir}/examples/suits.jpg", f"{cur_dir}/examples/suits_ins.png", "What objects can be seen in the image?", "0.5", "0.5"],
|
316 |
-
], inputs=[imagebox, segbox, textbox, temperature, top_p])
|
317 |
|
318 |
if not embed_mode:
|
319 |
gr.Markdown(tos_markdown)
|
@@ -327,16 +299,16 @@ def build_demo(embed_mode):
|
|
327 |
[state, model_selector], [textbox, upvote_btn, downvote_btn, flag_btn])
|
328 |
flag_btn.click(flag_last_response,
|
329 |
[state, model_selector], [textbox, upvote_btn, downvote_btn, flag_btn])
|
330 |
-
regenerate_btn.click(regenerate, [state, image_process_mode, seg_process_mode],
|
331 |
-
[state, chatbot, textbox, imagebox, segbox] + btn_list).then(
|
332 |
http_bot, [state, model_selector, temperature, top_p, max_output_tokens],
|
333 |
[state, chatbot] + btn_list)
|
334 |
-
clear_btn.click(clear_history, None, [state, chatbot, textbox, imagebox, segbox] + btn_list)
|
335 |
|
336 |
-
textbox.submit(add_text, [state, textbox, imagebox, image_process_mode, segbox, seg_process_mode], [state, chatbot, textbox, imagebox, segbox] + btn_list
|
337 |
).then(http_bot, [state, model_selector, temperature, top_p, max_output_tokens],
|
338 |
[state, chatbot] + btn_list)
|
339 |
-
submit_btn.click(add_text, [state, textbox, imagebox, image_process_mode, segbox, seg_process_mode], [state, chatbot, textbox, imagebox, segbox] + btn_list
|
340 |
).then(http_bot, [state, model_selector, temperature, top_p, max_output_tokens],
|
341 |
[state, chatbot] + btn_list)
|
342 |
|
|
|
80 |
vote_last_response(state, "flag", model_selector, request)
|
81 |
return ("",) + (disable_btn,) * 3
|
82 |
|
83 |
+
def regenerate(state, image_process_mode, seg_process_mode, depth_process_mode):
|
84 |
state.messages[-1][-1] = None
|
85 |
prev_human_msg = state.messages[-2]
|
86 |
if type(prev_human_msg[1]) in (tuple, list):
|
87 |
+
prev_human_msg[1] = (*prev_human_msg[1][:2], image_process_mode, prev_human_msg[1][3], seg_process_mode, prev_human_msg[1][5], depth_process_mode)
|
88 |
state.skip_next = False
|
89 |
+
return (state, state.to_gradio_chatbot(), "", None, None, None, None) + (disable_btn,) * 5
|
90 |
|
91 |
|
92 |
def clear_history(request: gr.Request):
|
93 |
state = default_conversation.copy()
|
94 |
+
return (state, state.to_gradio_chatbot(), "", None, None, None, None) + (disable_btn,) * 5
|
95 |
|
96 |
|
97 |
def add_text(state, text, image, image_process_mode, seg, seg_process_mode, depth, depth_process_mode, request: gr.Request):
|
98 |
logger.info(f"add_text. len: {len(text)}")
|
99 |
if len(text) <= 0 and image is None:
|
100 |
state.skip_next = True
|
101 |
+
return (state, state.to_gradio_chatbot(), "", None, None, None, None) + (no_change_btn,) * 5
|
102 |
if args.moderate:
|
103 |
flagged = violates_moderation(text)
|
104 |
if flagged:
|
105 |
state.skip_next = True
|
106 |
+
return (state, state.to_gradio_chatbot(), moderation_msg, None, None, None, None) + (
|
107 |
no_change_btn,) * 5
|
108 |
|
109 |
+
text = text[:1200] # Hard cut-off
|
110 |
if image is not None:
|
111 |
+
text = text[:864] # Hard cut-off for images
|
112 |
if '<image>' not in text:
|
113 |
text = '<image>\n' + text
|
114 |
if seg is not None:
|
115 |
if '<seg>' not in text:
|
116 |
text = '<seg>\n' + text
|
117 |
+
if depth is not None:
|
118 |
+
if '<depth>' not in text:
|
119 |
+
text = '<depth>\n' + text
|
120 |
|
121 |
+
text = (text, image, image_process_mode, seg, seg_process_mode, depth, depth_process_mode)
|
122 |
if len(state.get_images(return_pil=True)) > 0:
|
123 |
state = default_conversation.copy()
|
124 |
state.append_message(state.roles[0], text)
|
125 |
state.append_message(state.roles[1], None)
|
126 |
state.skip_next = False
|
127 |
+
return (state, state.to_gradio_chatbot(), "", None, None, None, None) + (disable_btn,) * 5
|
128 |
|
129 |
|
130 |
def http_bot(state, model_selector, temperature, top_p, max_new_tokens, request: gr.Request):
|
|
|
148 |
# Construct prompt
|
149 |
prompt = state.get_prompt()
|
150 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
151 |
# Make requests
|
152 |
pload = {
|
153 |
"model": model_name,
|
|
|
156 |
"top_p": float(top_p),
|
157 |
"max_new_tokens": min(int(max_new_tokens), 1536),
|
158 |
"stop": state.sep if state.sep_style in [SeparatorStyle.SINGLE, SeparatorStyle.MPT] else state.sep2,
|
159 |
+
"images": f'List of {len(state.get_images())}',
|
160 |
+
"segs": f'List of {len(state.get_segs())}',
|
161 |
+
"depths": f'List of {len(state.get_depths())}',
|
162 |
}
|
163 |
logger.info(f"==== request ====\n{pload}")
|
164 |
|
165 |
pload['images'] = state.get_images()
|
166 |
pload['segs'] = state.get_segs()
|
167 |
+
pload['depths'] = state.get_depths()
|
168 |
|
169 |
state.messages[-1][-1] = "▌"
|
170 |
yield (state, state.to_gradio_chatbot()) + (disable_btn,) * 5
|
|
|
194 |
|
195 |
state.messages[-1][-1] = state.messages[-1][-1][:-1]
|
196 |
yield (state, state.to_gradio_chatbot()) + (enable_btn,) * 5
|
|
|
|
|
197 |
logger.info(f"{output}")
|
198 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
199 |
|
200 |
title = "<h1 style='margin-bottom: -10px; text-align: center'>VCoder: Versatile Vision Encoders for Multimodal Large Language Models</h1>"
|
201 |
# style='
|
|
|
255 |
["Crop", "Resize", "Pad", "Default"],
|
256 |
value="Default",
|
257 |
label="Preprocess for non-square Seg Map", visible=False)
|
258 |
+
|
259 |
+
depthbox = gr.Image(type="pil", label="Depth Map")
|
260 |
+
depth_process_mode = gr.Radio(
|
261 |
+
["Crop", "Resize", "Pad", "Default"],
|
262 |
+
value="Default",
|
263 |
+
label="Preprocess for non-square Depth Map", visible=False)
|
264 |
|
265 |
with gr.Accordion("Parameters", open=False) as parameter_row:
|
266 |
temperature = gr.Slider(minimum=0.0, maximum=1.0, value=0.8, step=0.1, interactive=True, label="Temperature",)
|
|
|
284 |
|
285 |
cur_dir = os.path.dirname(os.path.abspath(__file__))
|
286 |
gr.Examples(examples=[
|
287 |
+
[f"{cur_dir}/examples/suits.jpg", f"{cur_dir}/examples/suits_pan.png", f"{cur_dir}/examples/suits_depth.jpeg", "Can you describe the depth order of the objects in this image, from closest to farthest?", "0.5", "0.5"],
|
288 |
+
], inputs=[imagebox, segbox, depthbox, textbox, temperature, top_p])
|
|
|
|
|
|
|
|
|
|
|
289 |
|
290 |
if not embed_mode:
|
291 |
gr.Markdown(tos_markdown)
|
|
|
299 |
[state, model_selector], [textbox, upvote_btn, downvote_btn, flag_btn])
|
300 |
flag_btn.click(flag_last_response,
|
301 |
[state, model_selector], [textbox, upvote_btn, downvote_btn, flag_btn])
|
302 |
+
regenerate_btn.click(regenerate, [state, image_process_mode, seg_process_mode, depth_process_mode],
|
303 |
+
[state, chatbot, textbox, imagebox, segbox, depthbox] + btn_list).then(
|
304 |
http_bot, [state, model_selector, temperature, top_p, max_output_tokens],
|
305 |
[state, chatbot] + btn_list)
|
306 |
+
clear_btn.click(clear_history, None, [state, chatbot, textbox, imagebox, segbox, depthbox] + btn_list)
|
307 |
|
308 |
+
textbox.submit(add_text, [state, textbox, imagebox, image_process_mode, segbox, seg_process_mode, depthbox, depth_process_mode], [state, chatbot, textbox, imagebox, segbox, depthbox] + btn_list
|
309 |
).then(http_bot, [state, model_selector, temperature, top_p, max_output_tokens],
|
310 |
[state, chatbot] + btn_list)
|
311 |
+
submit_btn.click(add_text, [state, textbox, imagebox, image_process_mode, segbox, seg_process_mode, depthbox, depth_process_mode], [state, chatbot, textbox, imagebox, segbox, depthbox] + btn_list
|
312 |
).then(http_bot, [state, model_selector, temperature, top_p, max_output_tokens],
|
313 |
[state, chatbot] + btn_list)
|
314 |
|