Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -136,64 +136,85 @@ model = AutoModel.from_pretrained(
|
|
136 |
tokenizer = AutoTokenizer.from_pretrained("5CD-AI/Vintern-3B-R-beta", trust_remote_code=True, use_fast=False)
|
137 |
|
138 |
@spaces.GPU
|
139 |
-
def chat(message, history):
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
if len(history) != 0 and len(message["files"]) != 0:
|
144 |
-
return """Chúng tôi hiện chỉ hổ trợ 1 ảnh ở đầu ngữ cảnh! Vui lòng tạo mới cuộc trò chuyện.
|
145 |
-
We currently only support one image at the start of the context! Please start a new conversation."""
|
146 |
|
147 |
-
|
148 |
-
|
149 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
150 |
else:
|
151 |
-
|
152 |
-
|
153 |
-
elif len(history) == 0 and len(message["files"]) == 0:
|
154 |
-
pixel_values = None
|
155 |
-
elif history[0][0][0] is not None and os.path.isfile(history[0][0][0]):
|
156 |
-
test_image = history[0][0][0]
|
157 |
-
pixel_values = load_image(test_image, max_num=6).to(torch.bfloat16).cuda()
|
158 |
-
else:
|
159 |
-
pixel_values = None
|
160 |
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
|
|
|
|
167 |
else:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
168 |
question = message["text"]
|
169 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
170 |
else:
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
if i == 0 and start_index == 1:
|
179 |
-
conv_history.append(tuple(['<image>\n'+chat_pair[0],chat_pair[1]]))
|
180 |
-
else:
|
181 |
-
conv_history.append(tuple(chat_pair))
|
182 |
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
|
|
|
|
|
|
|
|
189 |
|
190 |
-
# return response
|
191 |
-
buffer = ""
|
192 |
-
for new_text in response:
|
193 |
-
buffer += new_text
|
194 |
-
generated_text_without_prompt = buffer[:]
|
195 |
-
time.sleep(0.02)
|
196 |
-
yield generated_text_without_prompt
|
197 |
|
198 |
CSS ="""
|
199 |
#component-10 {
|
@@ -250,7 +271,16 @@ button.svelte-1lcyrx4[aria-label="user's message: a file of type image/jpeg, "]
|
|
250 |
margin-bottom: var(--spacing-lg);
|
251 |
}
|
252 |
|
253 |
-
#think-button {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
254 |
"""
|
255 |
|
256 |
js = """
|
@@ -290,7 +320,7 @@ demo = gr.Blocks(css=CSS,js=js, theme='NoCrypt/miku')
|
|
290 |
# Hàm bật/tắt Think Mode
|
291 |
def toggle_think_mode(current_state):
|
292 |
new_state = not current_state
|
293 |
-
button_label = "💡
|
294 |
return new_state, button_label
|
295 |
|
296 |
|
@@ -315,7 +345,7 @@ with demo:
|
|
315 |
)
|
316 |
|
317 |
with gr.Row(variant="compact"):
|
318 |
-
think_button = gr.Button("
|
319 |
|
320 |
# Khi nhấn nút, trạng thái think_mode thay đổi + đổi nhãn nút
|
321 |
think_button.click(toggle_think_mode, inputs=[think_mode], outputs=[think_mode, think_button])
|
|
|
136 |
tokenizer = AutoTokenizer.from_pretrained("5CD-AI/Vintern-3B-R-beta", trust_remote_code=True, use_fast=False)
|
137 |
|
138 |
@spaces.GPU
|
139 |
+
def chat(message, history, think_mode):
|
140 |
+
if not think_mode:
|
141 |
+
print("history",history)
|
142 |
+
print("message",message)
|
|
|
|
|
|
|
143 |
|
144 |
+
if len(history) != 0 and len(message["files"]) != 0:
|
145 |
+
return """Chúng tôi hiện chỉ hổ trợ 1 ảnh ở đầu ngữ cảnh! Vui lòng tạo mới cuộc trò chuyện.
|
146 |
+
We currently only support one image at the start of the context! Please start a new conversation."""
|
147 |
+
|
148 |
+
if len(history) == 0 and len(message["files"]) != 0:
|
149 |
+
if "path" in message["files"][0]:
|
150 |
+
test_image = message["files"][0]["path"]
|
151 |
+
else:
|
152 |
+
test_image = message["files"][0]
|
153 |
+
pixel_values = load_image(test_image, max_num=6).to(torch.bfloat16).cuda()
|
154 |
+
elif len(history) == 0 and len(message["files"]) == 0:
|
155 |
+
pixel_values = None
|
156 |
+
elif history[0][0][0] is not None and os.path.isfile(history[0][0][0]):
|
157 |
+
test_image = history[0][0][0]
|
158 |
+
pixel_values = load_image(test_image, max_num=6).to(torch.bfloat16).cuda()
|
159 |
else:
|
160 |
+
pixel_values = None
|
161 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
162 |
|
163 |
+
generation_config = dict(max_new_tokens= 700, do_sample=False, num_beams = 3, repetition_penalty=2.5)
|
164 |
+
|
165 |
+
if len(history) == 0:
|
166 |
+
if pixel_values is not None:
|
167 |
+
question = '<image>\n'+message["text"]
|
168 |
+
else:
|
169 |
+
question = message["text"]
|
170 |
+
response, conv_history = model.chat(tokenizer, pixel_values, question, generation_config, history=None, return_history=True)
|
171 |
else:
|
172 |
+
conv_history = []
|
173 |
+
if history[0][0][0] is not None and os.path.isfile(history[0][0][0]):
|
174 |
+
start_index = 1
|
175 |
+
else:
|
176 |
+
start_index = 0
|
177 |
+
|
178 |
+
for i, chat_pair in enumerate(history[start_index:]):
|
179 |
+
if i == 0 and start_index == 1:
|
180 |
+
conv_history.append(tuple(['<image>\n'+chat_pair[0],chat_pair[1]]))
|
181 |
+
else:
|
182 |
+
conv_history.append(tuple(chat_pair))
|
183 |
+
|
184 |
+
|
185 |
+
print("conv_history",conv_history)
|
186 |
question = message["text"]
|
187 |
+
response, conv_history = model.chat(tokenizer, pixel_values, question, generation_config, history=conv_history, return_history=True)
|
188 |
+
|
189 |
+
print(f'User: {question}\nAssistant: {response}')
|
190 |
+
|
191 |
+
# return response
|
192 |
+
buffer = ""
|
193 |
+
for new_text in response:
|
194 |
+
buffer += new_text
|
195 |
+
generated_text_without_prompt = buffer[:]
|
196 |
+
time.sleep(0.02)
|
197 |
+
yield generated_text_without_prompt
|
198 |
else:
|
199 |
+
buffer = ""
|
200 |
+
thinking = """🔄 Model is analyzing the data...\n
|
201 |
+
I am processing your request carefully. First, I need to understand the question clearly.
|
202 |
+
Then, I retrieve relevant information and analyze different possibilities.
|
203 |
+
Finally, I generate a structured response that best fits your input.
|
204 |
+
\nThis process ensures that I provide the most accurate and meaningful answer possible.
|
205 |
+
"""
|
|
|
|
|
|
|
|
|
206 |
|
207 |
+
accumulated_text = "💡 **Thinking process:** *(Click to expand)*\n\n"
|
208 |
+
accumulated_text += "<pre><code>\n"
|
209 |
+
|
210 |
+
temp_text = ""
|
211 |
+
for char in thinking:
|
212 |
+
temp_text += char
|
213 |
+
yield accumulated_text + temp_text + "\n</code></pre>\n\n---\n"
|
214 |
+
time.sleep(0.02)
|
215 |
+
|
216 |
+
accumulated_text += temp_text + "\n</code></pre>\n\n---\n"
|
217 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
218 |
|
219 |
CSS ="""
|
220 |
#component-10 {
|
|
|
271 |
margin-bottom: var(--spacing-lg);
|
272 |
}
|
273 |
|
274 |
+
#think-button {
|
275 |
+
position: absolute;
|
276 |
+
bottom: 20px; /* Dời xuống 10px nữa */
|
277 |
+
left: 10px;
|
278 |
+
font-size: 12px;
|
279 |
+
padding: 5px 10px;
|
280 |
+
border-radius: 10px; /* Bo tròn góc */
|
281 |
+
background-color: #f4f4f4; /* Màu nền nhẹ */
|
282 |
+
border: 1px solid #ccc; /* Viền nhẹ */
|
283 |
+
}
|
284 |
"""
|
285 |
|
286 |
js = """
|
|
|
320 |
# Hàm bật/tắt Think Mode
|
321 |
def toggle_think_mode(current_state):
|
322 |
new_state = not current_state
|
323 |
+
button_label = "🧠Think💡" if new_state else "🧠Think"
|
324 |
return new_state, button_label
|
325 |
|
326 |
|
|
|
345 |
)
|
346 |
|
347 |
with gr.Row(variant="compact"):
|
348 |
+
think_button = gr.Button("🧠Think", elem_id="think-button", variant="secondary")
|
349 |
|
350 |
# Khi nhấn nút, trạng thái think_mode thay đổi + đổi nhãn nút
|
351 |
think_button.click(toggle_think_mode, inputs=[think_mode], outputs=[think_mode, think_button])
|