BenkHel commited on
Commit
6d02f81
·
verified ·
1 Parent(s): 6f484e9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +304 -10
app.py CHANGED
@@ -1,19 +1,313 @@
1
- import gradio as gr
 
 
 
 
2
  import spaces
3
- from cumo.model.builder import load_pretrained_model
4
- from cumo.mm_utils import process_images, tokenizer_image_token
5
- from cumo.constants import IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN
6
- import torch
 
 
 
7
  from PIL import Image
8
 
9
- model_path = "BenkHel/CumoThesis"
10
- model_base = None
11
- model_name = "CumoThesis" # oder "BenkHel/CumoThesis"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  load_8bit = False
13
  load_4bit = False
14
- device = "cuda" if torch.cuda.is_available() else "cpu"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
- tokenizer, model, image_processor, context_len = load_pretrained_model(
 
 
 
17
  model_path, model_base, model_name, load_8bit, load_4bit, device, use_flash_attn=False
18
  )
19
 
 
1
+ import sys
2
+ import os
3
+ import argparse
4
+ import time
5
+ import subprocess
6
  import spaces
7
+ import cumo.serve.gradio_web_server as gws
8
+
9
+ import datetime
10
+ import json
11
+
12
+ import gradio as gr
13
+ import requests
14
  from PIL import Image
15
 
16
+ from cumo.conversation import (default_conversation, conv_templates, SeparatorStyle)
17
+ from cumo.constants import LOGDIR
18
+ from cumo.utils import (build_logger, server_error_msg, violates_moderation, moderation_msg)
19
+ import hashlib
20
+
21
+ import torch
22
+ import io
23
+ from cumo.constants import WORKER_HEART_BEAT_INTERVAL
24
+ from cumo.utils import (build_logger, server_error_msg,
25
+ pretty_print_semaphore)
26
+ from cumo.model.builder import load_pretrained_model
27
+ from cumo.mm_utils import process_images, load_image_from_base64, tokenizer_image_token
28
+ from cumo.constants import IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN, DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN
29
+ from transformers import TextIteratorStreamer
30
+ from threading import Thread
31
+
32
+ # Execute the pip install command with additional options
33
+ #subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'flash-attn', '--no-build-isolation', '-U']
34
+
35
+ headers = {"User-Agent": "CuMo"}
36
+
37
+ no_change_btn = gr.Button()
38
+ enable_btn = gr.Button(interactive=True)
39
+ disable_btn = gr.Button(interactive=False)
40
+
41
+ device = "cuda" if torch.cuda.is_available() else "cpu"
42
+ model_path = 'BenkHel/CumoThesis'
43
+ model_base = 'mistralai/Mistral-7B-Instruct-v0.2'
44
+ model_name = 'CumoThesis'
45
+ conv_mode = 'mistral_instruct_system'
46
  load_8bit = False
47
  load_4bit = False
48
+ tokenizer, model, image_processor, context_len = load_pretrained_model(model_path, model_base, model_name, load_8bit, load_4bit, device=device, use_flash_attn=False)
49
+ model.config.training = False
50
+
51
+ def upvote_last_response(state):
52
+ return ("",) + (disable_btn,) * 3
53
+
54
+
55
+ def downvote_last_response(state):
56
+ return ("",) + (disable_btn,) * 3
57
+
58
+
59
+ def flag_last_response(state):
60
+ return ("",) + (disable_btn,) * 3
61
+
62
+ def clear_history():
63
+ state = default_conversation.copy()
64
+ return (state, state.to_gradio_chatbot(), "", None) + (disable_btn,) * 5
65
+
66
+ def add_text(state, imagebox, textbox, image_process_mode):
67
+ if state is None:
68
+ state = conv_templates[conv_mode].copy()
69
+
70
+ if imagebox is not None:
71
+ textbox = DEFAULT_IMAGE_TOKEN + '\n' + textbox
72
+ image = Image.open(imagebox).convert('RGB')
73
+
74
+ if imagebox is not None:
75
+ textbox = (textbox, image, image_process_mode)
76
+
77
+ state.append_message(state.roles[0], textbox)
78
+ state.append_message(state.roles[1], None)
79
+
80
+ yield (state, state.to_gradio_chatbot(), "", None) + (disable_btn, disable_btn, disable_btn, enable_btn, enable_btn)
81
+
82
+ def delete_text(state, image_process_mode):
83
+ state.messages[-1][-1] = None
84
+ prev_human_msg = state.messages[-2]
85
+ if type(prev_human_msg[1]) in (tuple, list):
86
+ prev_human_msg[1] = (*prev_human_msg[1][:2], image_process_mode)
87
+ yield (state, state.to_gradio_chatbot(), "", None) + (disable_btn, disable_btn, disable_btn, enable_btn, enable_btn)
88
+
89
+ def regenerate(state, image_process_mode):
90
+ state.messages[-1][-1] = None
91
+ prev_human_msg = state.messages[-2]
92
+ if type(prev_human_msg[1]) in (tuple, list):
93
+ prev_human_msg[1] = (*prev_human_msg[1][:2], image_process_mode)
94
+ state.skip_next = False
95
+ return (state, state.to_gradio_chatbot(), "", None) + (disable_btn,) * 5
96
+
97
+ @spaces.GPU
98
+ def generate(state, imagebox, textbox, image_process_mode, temperature, top_p, max_output_tokens):
99
+ prompt = state.get_prompt()
100
+ images = state.get_images(return_pil=True)
101
+ #prompt, image_args = process_image(prompt, images)
102
+
103
+ ori_prompt = prompt
104
+ num_image_tokens = 0
105
+
106
+ if images is not None and len(images) > 0:
107
+ if len(images) > 0:
108
+ if len(images) != prompt.count(DEFAULT_IMAGE_TOKEN):
109
+ raise ValueError("Number of images does not match number of <image> tokens in prompt")
110
+
111
+ #images = [load_image_from_base64(image) for image in images]
112
+ image_sizes = [image.size for image in images]
113
+ images = process_images(images, image_processor, model.config)
114
+
115
+ if type(images) is list:
116
+ images = [image.to(model.device, dtype=torch.float16) for image in images]
117
+ else:
118
+ images = images.to(model.device, dtype=torch.float16)
119
+
120
+ replace_token = DEFAULT_IMAGE_TOKEN
121
+ if getattr(model.config, 'mm_use_im_start_end', False):
122
+ replace_token = DEFAULT_IM_START_TOKEN + replace_token + DEFAULT_IM_END_TOKEN
123
+ prompt = prompt.replace(DEFAULT_IMAGE_TOKEN, replace_token)
124
+
125
+ num_image_tokens = prompt.count(replace_token) * model.get_vision_tower().num_patches
126
+ else:
127
+ images = None
128
+ image_sizes = None
129
+ image_args = {"images": images, "image_sizes": image_sizes}
130
+ else:
131
+ images = None
132
+ image_args = {}
133
+
134
+ max_context_length = getattr(model.config, 'max_position_embeddings', 2048)
135
+ max_new_tokens = 512
136
+ do_sample = True if temperature > 0.001 else False
137
+ stop_str = state.sep if state.sep_style in [SeparatorStyle.SINGLE, SeparatorStyle.MPT] else state.sep2
138
+
139
+ input_ids = tokenizer_image_token(prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors='pt').unsqueeze(0).to(model.device)
140
+ streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True, timeout=15)
141
+
142
+ max_new_tokens = min(max_new_tokens, max_context_length - input_ids.shape[-1] - num_image_tokens)
143
+
144
+ if max_new_tokens < 1:
145
+ yield json.dumps({"text": ori_prompt + "Exceeds max token length. Please start a new conversation, thanks.", "error_code": 0}).encode() + b"\0"
146
+ return
147
+
148
+ thread = Thread(target=model.generate, kwargs=dict(
149
+ inputs=input_ids,
150
+ do_sample=do_sample,
151
+ temperature=temperature,
152
+ top_p=top_p,
153
+ max_new_tokens=max_new_tokens,
154
+ streamer=streamer,
155
+ use_cache=True,
156
+ pad_token_id=tokenizer.eos_token_id,
157
+ **image_args
158
+ ))
159
+ thread.start()
160
+ generated_text = ''
161
+ for new_text in streamer:
162
+ generated_text += new_text
163
+ if generated_text.endswith(stop_str):
164
+ generated_text = generated_text[:-len(stop_str)]
165
+ state.messages[-1][-1] = generated_text
166
+ yield (state, state.to_gradio_chatbot(), "", None) + (disable_btn, disable_btn, disable_btn, enable_btn, enable_btn)
167
+
168
+ yield (state, state.to_gradio_chatbot(), "", None) + (enable_btn,) * 5
169
+
170
+ torch.cuda.empty_cache()
171
+
172
+ title_markdown = ("""
173
+ # CuMo: Scaling Multimodal LLM with Co-Upcycled Mixture-of-Experts
174
+ [[Project Page](https://chrisjuniorli.github.io/project/CuMo/)] [[Code](https://github.com/SHI-Labs/CuMo)] [[Model](https://huggingface.co/shi-labs/CuMo-mistral-7b)] | 📚 [[Arxiv](https://arxiv.org/pdf/2405.05949)]]
175
+ """)
176
+
177
+ tos_markdown = ("""
178
+ ### Terms of use
179
+ By using this service, users are required to agree to the following terms:
180
+ The service is a research preview intended for non-commercial use only. It only provides limited safety measures and may generate offensive content. It must not be used for any illegal, harmful, violent, racist, or sexual purposes. The service may collect user dialogue data for future research.
181
+ Please click the "Flag" button if you get any inappropriate answer! We will collect those to keep improving our moderator.
182
+ For an optimal experience, please use desktop computers for this demo, as mobile devices may compromise its quality.
183
+ """)
184
+
185
+
186
+ learn_more_markdown = ("""
187
+ ### License
188
+ The service is a research preview intended for non-commercial use only, subject to the. Please contact us if you find any potential violation.
189
+ """)
190
+
191
+ block_css = """
192
+ #buttons button {
193
+ min-width: min(120px,100%);
194
+ }
195
+ """
196
+
197
+ textbox = gr.Textbox(show_label=False, placeholder="Enter text and press ENTER", container=False)
198
+ with gr.Blocks(title="CuMo", theme=gr.themes.Default(), css=block_css) as demo:
199
+ state = gr.State()
200
+
201
+ gr.Markdown(title_markdown)
202
+
203
+ with gr.Row():
204
+ with gr.Column(scale=3):
205
+ imagebox = gr.Image(label="Input Image", type="filepath")
206
+ image_process_mode = gr.Radio(
207
+ ["Crop", "Resize", "Pad", "Default"],
208
+ value="Default",
209
+ label="Preprocess for non-square image", visible=False)
210
+
211
+
212
+ #cur_dir = os.path.dirname(os.path.abspath(__file__))
213
+ cur_dir = './cumo/serve'
214
+ gr.Examples(examples=[
215
+ [f"{cur_dir}/examples/aveger.jpg", "Can you introduce this movie based on the poster?"],
216
+ [f"{cur_dir}/examples/fridge.webp", "Can you describe what groceries are presented in this fridge?"],
217
+ [f"{cur_dir}/examples/su7_4.jpg", "What car is it in this image?"],
218
+ [f"{cur_dir}/examples/nvidia.jpeg", "Can you tell me what happened in this image?"],
219
+ [f"{cur_dir}/examples/animal.webp", "What animals are in this image?"],
220
+ [f"{cur_dir}/examples/disney.jpeg", "How many characters in this image?"],
221
+ [f"{cur_dir}/examples/reka_6.jpeg", "What colour is my hat (im sitting on the bear)?"],
222
+ ], inputs=[imagebox, textbox], cache_examples=False)
223
+
224
+ with gr.Accordion("Parameters", open=False) as parameter_row:
225
+ temperature = gr.Slider(minimum=0.0, maximum=1.0, value=0.2, step=0.1, interactive=True, label="Temperature",)
226
+ top_p = gr.Slider(minimum=0.0, maximum=1.0, value=0.7, step=0.1, interactive=True, label="Top P",)
227
+ max_output_tokens = gr.Slider(minimum=0, maximum=1024, value=512, step=64, interactive=True, label="Max output tokens",)
228
+
229
+ with gr.Column(scale=8):
230
+ chatbot = gr.Chatbot(
231
+ elem_id="chatbot",
232
+ label="CuMo Chatbot",
233
+ height=650,
234
+ layout="panel",
235
+ )
236
+ with gr.Row():
237
+ with gr.Column(scale=8):
238
+ textbox.render()
239
+ with gr.Column(scale=1, min_width=50):
240
+ submit_btn = gr.Button(value="Send", variant="primary")
241
+ with gr.Row(elem_id="buttons") as button_row:
242
+ upvote_btn = gr.Button(value="👍 Upvote", interactive=False)
243
+ downvote_btn = gr.Button(value="👎 Downvote", interactive=False)
244
+ flag_btn = gr.Button(value="⚠️ Flag", interactive=False)
245
+ #stop_btn = gr.Button(value="⏹️ Stop Generation", interactive=False)
246
+ regenerate_btn = gr.Button(value="🔄 Regenerate", interactive=False)
247
+ clear_btn = gr.Button(value="🗑️ Clear", interactive=False)
248
+
249
+ gr.Markdown(tos_markdown)
250
+ gr.Markdown(learn_more_markdown)
251
+ url_params = gr.JSON(visible=False)
252
+
253
+ # Register listeners
254
+ btn_list = [upvote_btn, downvote_btn, flag_btn, regenerate_btn, clear_btn]
255
+ upvote_btn.click(
256
+ upvote_last_response,
257
+ [state],
258
+ [textbox, upvote_btn, downvote_btn, flag_btn]
259
+ )
260
+ downvote_btn.click(
261
+ downvote_last_response,
262
+ [state],
263
+ [textbox, upvote_btn, downvote_btn, flag_btn]
264
+ )
265
+ flag_btn.click(
266
+ flag_last_response,
267
+ [state],
268
+ [textbox, upvote_btn, downvote_btn, flag_btn]
269
+ )
270
+
271
+ clear_btn.click(
272
+ clear_history,
273
+ None,
274
+ [state, chatbot, textbox, imagebox] + btn_list,
275
+ queue=False
276
+ )
277
+
278
+ regenerate_btn.click(
279
+ delete_text,
280
+ [state, image_process_mode],
281
+ [state, chatbot, textbox, imagebox] + btn_list,
282
+ ).then(
283
+ generate,
284
+ [state, imagebox, textbox, image_process_mode, temperature, top_p, max_output_tokens],
285
+ [state, chatbot, textbox, imagebox] + btn_list,
286
+ )
287
+ textbox.submit(
288
+ add_text,
289
+ [state, imagebox, textbox, image_process_mode],
290
+ [state, chatbot, textbox, imagebox] + btn_list,
291
+ ).then(
292
+ generate,
293
+ [state, imagebox, textbox, image_process_mode, temperature, top_p, max_output_tokens],
294
+ [state, chatbot, textbox, imagebox] + btn_list,
295
+ )
296
+
297
+ submit_btn.click(
298
+ add_text,
299
+ [state, imagebox, textbox, image_process_mode],
300
+ [state, chatbot, textbox, imagebox] + btn_list,
301
+ ).then(
302
+ generate,
303
+ [state, imagebox, textbox, image_process_mode, temperature, top_p, max_output_tokens],
304
+ [state, chatbot, textbox, imagebox] + btn_list,
305
+ )
306
 
307
+ demo.queue(
308
+ status_update_rate=10,
309
+ api_open=False
310
+ ).launch()tokenizer, model, image_processor, context_len = load_pretrained_model(
311
  model_path, model_base, model_name, load_8bit, load_4bit, device, use_flash_attn=False
312
  )
313