Spaces:
Sleeping
Sleeping
baohuynhbk14
commited on
Commit
·
b30d21d
1
Parent(s):
b41e98c
Add image processing functions and update error messages in utils.py
Browse files- __pycache__/constants.cpython-39.pyc +0 -0
- __pycache__/conversation.cpython-39.pyc +0 -0
- __pycache__/models.cpython-39.pyc +0 -0
- __pycache__/utils.cpython-39.pyc +0 -0
- app.py +80 -61
- conversation.py +18 -5
- logs/2025-01-15-conv.json +9 -0
- logs/2025-01-15-conv.json.lock +0 -0
- logs/gradio_web_server.log +0 -0
- logs/serve_images/2025-01-15/d7ea81988546544ca773fc48dc9da837.jpg +0 -0
- models.py +112 -0
- utils.py +1 -1
__pycache__/constants.cpython-39.pyc
ADDED
Binary file (722 Bytes). View file
|
|
__pycache__/conversation.cpython-39.pyc
ADDED
Binary file (8.63 kB). View file
|
|
__pycache__/models.cpython-39.pyc
ADDED
Binary file (3.76 kB). View file
|
|
__pycache__/utils.cpython-39.pyc
ADDED
Binary file (4.92 kB). View file
|
|
app.py
CHANGED
@@ -7,14 +7,14 @@ import os
|
|
7 |
import time
|
8 |
import hashlib
|
9 |
import re
|
10 |
-
|
11 |
import gradio as gr
|
12 |
import requests
|
13 |
import random
|
14 |
from filelock import FileLock
|
15 |
from io import BytesIO
|
16 |
from PIL import Image, ImageDraw, ImageFont
|
17 |
-
|
18 |
from constants import LOGDIR
|
19 |
from utils import (
|
20 |
build_logger,
|
@@ -25,7 +25,8 @@ from utils import (
|
|
25 |
get_log_filename,
|
26 |
)
|
27 |
from threading import Thread
|
28 |
-
import
|
|
|
29 |
from conversation import Conversation
|
30 |
from transformers import AutoModel, AutoTokenizer, TextIteratorStreamer
|
31 |
|
@@ -166,6 +167,7 @@ def add_text(state, message, system_prompt, request: gr.Request):
|
|
166 |
) * 5
|
167 |
|
168 |
model_name = "5CD-AI/Vintern-1B-v3_5"
|
|
|
169 |
model = AutoModel.from_pretrained(
|
170 |
model_name,
|
171 |
torch_dtype=torch.bfloat16,
|
@@ -196,7 +198,6 @@ def http_bot(
|
|
196 |
) + (no_change_btn,) * 5
|
197 |
return
|
198 |
|
199 |
-
# No available worker
|
200 |
if model is None:
|
201 |
# state.messages[-1][-1] = server_error_msg
|
202 |
state.update_message(Conversation.ASSISTANT, server_error_msg)
|
@@ -225,16 +226,33 @@ def http_bot(
|
|
225 |
try:
|
226 |
# Stream output
|
227 |
# response = requests.post(worker_addr, json=pload, headers=headers, stream=True, timeout=300)
|
228 |
-
|
229 |
-
|
230 |
-
)
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
236 |
buffer = ""
|
237 |
-
for new_text in
|
238 |
buffer += new_text
|
239 |
# Remove <|im_end|> or similar tokens from the output
|
240 |
buffer = buffer.replace("<|im_end|>", "")
|
@@ -247,6 +265,8 @@ def http_bot(
|
|
247 |
) + (disable_btn,) * 5
|
248 |
|
249 |
except Exception as e:
|
|
|
|
|
250 |
state.update_message(Conversation.ASSISTANT, server_error_msg, None)
|
251 |
yield (
|
252 |
state,
|
@@ -289,20 +309,19 @@ def http_bot(
|
|
289 |
|
290 |
# <h1 style="font-size: 28px; font-weight: bold;">Expanding Performance Boundaries of Open-Source Multimodal Models with Model, Data, and Test-Time Scaling</h1>
|
291 |
title_html = """
|
292 |
-
<
|
293 |
-
<
|
294 |
-
<
|
295 |
-
<a href="https://huggingface.co/papers/
|
296 |
-
<a href="https://
|
297 |
-
|
298 |
-
<a href="https://internvl.opengvlab.com/">[🗨️ Official Demo]</a>
|
299 |
"""
|
300 |
|
301 |
|
302 |
tos_markdown = """
|
303 |
### Terms of use
|
304 |
By using this service, users are required to agree to the following terms:
|
305 |
-
|
306 |
Please click the "Flag" button if you get any inappropriate answer! We will collect those to keep improving our moderator.
|
307 |
For an optimal experience, please use desktop computers for this demo, as mobile devices may compromise its quality.
|
308 |
"""
|
@@ -332,45 +351,45 @@ block_css = """
|
|
332 |
}
|
333 |
"""
|
334 |
|
335 |
-
js = """
|
336 |
-
function createWaveAnimation() {
|
337 |
-
|
338 |
-
|
339 |
-
|
340 |
-
|
341 |
-
|
342 |
-
|
343 |
-
|
344 |
-
|
345 |
-
|
346 |
-
|
347 |
-
|
348 |
-
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
-
|
362 |
-
|
363 |
-
|
364 |
-
|
365 |
-
|
366 |
-
|
367 |
-
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
}
|
372 |
-
|
373 |
-
"""
|
374 |
|
375 |
|
376 |
def build_demo():
|
@@ -472,7 +491,7 @@ def build_demo():
|
|
472 |
with gr.Column(scale=8):
|
473 |
chatbot = gr.Chatbot(
|
474 |
elem_id="chatbot",
|
475 |
-
label="
|
476 |
height=580,
|
477 |
show_copy_button=True,
|
478 |
show_share_button=True,
|
|
|
7 |
import time
|
8 |
import hashlib
|
9 |
import re
|
10 |
+
import torch
|
11 |
import gradio as gr
|
12 |
import requests
|
13 |
import random
|
14 |
from filelock import FileLock
|
15 |
from io import BytesIO
|
16 |
from PIL import Image, ImageDraw, ImageFont
|
17 |
+
from models import load_image
|
18 |
from constants import LOGDIR
|
19 |
from utils import (
|
20 |
build_logger,
|
|
|
25 |
get_log_filename,
|
26 |
)
|
27 |
from threading import Thread
|
28 |
+
import traceback
|
29 |
+
# import torch
|
30 |
from conversation import Conversation
|
31 |
from transformers import AutoModel, AutoTokenizer, TextIteratorStreamer
|
32 |
|
|
|
167 |
) * 5
|
168 |
|
169 |
model_name = "5CD-AI/Vintern-1B-v3_5"
|
170 |
+
model = None
|
171 |
model = AutoModel.from_pretrained(
|
172 |
model_name,
|
173 |
torch_dtype=torch.bfloat16,
|
|
|
198 |
) + (no_change_btn,) * 5
|
199 |
return
|
200 |
|
|
|
201 |
if model is None:
|
202 |
# state.messages[-1][-1] = server_error_msg
|
203 |
state.update_message(Conversation.ASSISTANT, server_error_msg)
|
|
|
226 |
try:
|
227 |
# Stream output
|
228 |
# response = requests.post(worker_addr, json=pload, headers=headers, stream=True, timeout=300)
|
229 |
+
print(f"all_image_paths: {all_image_paths}")
|
230 |
+
|
231 |
+
pixel_values = load_image(all_image_paths[0], max_num=6).to(torch.bfloat16)
|
232 |
+
print(f"pixel_values: {pixel_values}")
|
233 |
+
generation_config = dict(max_new_tokens= 700, do_sample=False, num_beams = 3, repetition_penalty=2.5)
|
234 |
+
message = state.get_user_message(source=state.USER)
|
235 |
+
print(f"######################")
|
236 |
+
print(f"message: {message}")
|
237 |
+
if pixel_values is not None:
|
238 |
+
question = '<image>\n'+message
|
239 |
+
else:
|
240 |
+
question = message
|
241 |
+
response, conv_history = model.chat(tokenizer, pixel_values, question, generation_config, history=None, return_history=True)
|
242 |
+
print(f"AI response: {response}")
|
243 |
+
|
244 |
+
|
245 |
+
# streamer = TextIteratorStreamer(
|
246 |
+
# tokenizer, skip_prompt=True, skip_special_tokens=True
|
247 |
+
# )
|
248 |
+
# generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024)
|
249 |
+
|
250 |
+
# thread = Thread(target=model.generate, kwargs=generation_kwargs)
|
251 |
+
# thread.start()
|
252 |
+
|
253 |
+
# response = "This is a test response"
|
254 |
buffer = ""
|
255 |
+
for new_text in response:
|
256 |
buffer += new_text
|
257 |
# Remove <|im_end|> or similar tokens from the output
|
258 |
buffer = buffer.replace("<|im_end|>", "")
|
|
|
265 |
) + (disable_btn,) * 5
|
266 |
|
267 |
except Exception as e:
|
268 |
+
logger.error(f"Error in http_bot: {e}")
|
269 |
+
traceback.print_exc()
|
270 |
state.update_message(Conversation.ASSISTANT, server_error_msg, None)
|
271 |
yield (
|
272 |
state,
|
|
|
309 |
|
310 |
# <h1 style="font-size: 28px; font-weight: bold;">Expanding Performance Boundaries of Open-Source Multimodal Models with Model, Data, and Test-Time Scaling</h1>
|
311 |
title_html = """
|
312 |
+
<div style="text-align: center;">
|
313 |
+
<img src="https://lh3.googleusercontent.com/pw/AP1GczMmW-aFQ4dNaR_LCAllh4UZLLx9fTZ1ITHeGVMWx-1bwlIWz4VsWJSGb3_9C7CQfvboqJH41y2Sbc5ToC9ZmKeV4-buf_DEevIMU0HtaLWgHAPOqBiIbG6LaE8CvDqniLZzvB9UX8TR_-YgvYzPFt2z=w1472-h832-s-no-gm?authuser=0" style="height: 100; width: 100%;">
|
314 |
+
<p>Vintern-1B: An Efficient Multimodal Large Language Model for Vietnamese</p>
|
315 |
+
<a href="https://huggingface.co/papers/2408.12480">[📖 Vintern Paper]</a>
|
316 |
+
<a href="https://huggingface.co/5CD-AI">[🤗 5CD-AI Huggingface]</a>
|
317 |
+
</div>
|
|
|
318 |
"""
|
319 |
|
320 |
|
321 |
tos_markdown = """
|
322 |
### Terms of use
|
323 |
By using this service, users are required to agree to the following terms:
|
324 |
+
It only provides limited safety measures and may generate offensive content. It must not be used for any illegal, harmful, violent, racist, or sexual purposes. The service may collect user dialogue data for future research.
|
325 |
Please click the "Flag" button if you get any inappropriate answer! We will collect those to keep improving our moderator.
|
326 |
For an optimal experience, please use desktop computers for this demo, as mobile devices may compromise its quality.
|
327 |
"""
|
|
|
351 |
}
|
352 |
"""
|
353 |
|
354 |
+
# js = """
|
355 |
+
# function createWaveAnimation() {
|
356 |
+
# const text = document.getElementById('text');
|
357 |
+
# var i = 0;
|
358 |
+
# setInterval(function() {
|
359 |
+
# const colors = [
|
360 |
+
# 'red, orange, yellow, green, blue, indigo, violet, purple',
|
361 |
+
# 'orange, yellow, green, blue, indigo, violet, purple, red',
|
362 |
+
# 'yellow, green, blue, indigo, violet, purple, red, orange',
|
363 |
+
# 'green, blue, indigo, violet, purple, red, orange, yellow',
|
364 |
+
# 'blue, indigo, violet, purple, red, orange, yellow, green',
|
365 |
+
# 'indigo, violet, purple, red, orange, yellow, green, blue',
|
366 |
+
# 'violet, purple, red, orange, yellow, green, blue, indigo',
|
367 |
+
# 'purple, red, orange, yellow, green, blue, indigo, violet',
|
368 |
+
# ];
|
369 |
+
# const angle = 45;
|
370 |
+
# const colorIndex = i % colors.length;
|
371 |
+
# text.style.background = `linear-gradient(${angle}deg, ${colors[colorIndex]})`;
|
372 |
+
# text.style.webkitBackgroundClip = 'text';
|
373 |
+
# text.style.backgroundClip = 'text';
|
374 |
+
# text.style.color = 'transparent';
|
375 |
+
# text.style.fontSize = '28px';
|
376 |
+
# text.style.width = 'auto';
|
377 |
+
# text.textContent = 'Vintern-1B';
|
378 |
+
# text.style.fontWeight = 'bold';
|
379 |
+
# i += 1;
|
380 |
+
# }, 200);
|
381 |
+
# const params = new URLSearchParams(window.location.search);
|
382 |
+
# url_params = Object.fromEntries(params);
|
383 |
+
# // console.log(url_params);
|
384 |
+
# // console.log('hello world...');
|
385 |
+
# // console.log(window.location.search);
|
386 |
+
# // console.log('hello world...');
|
387 |
+
# // alert(window.location.search)
|
388 |
+
# // alert(url_params);
|
389 |
+
# return url_params;
|
390 |
+
# }
|
391 |
+
|
392 |
+
# """
|
393 |
|
394 |
|
395 |
def build_demo():
|
|
|
491 |
with gr.Column(scale=8):
|
492 |
chatbot = gr.Chatbot(
|
493 |
elem_id="chatbot",
|
494 |
+
label="Vintern",
|
495 |
height=580,
|
496 |
show_copy_button=True,
|
497 |
show_share_button=True,
|
conversation.py
CHANGED
@@ -173,6 +173,15 @@ class Conversation:
|
|
173 |
images.append(image)
|
174 |
|
175 |
return images
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
176 |
|
177 |
def to_gradio_chatbot(self):
|
178 |
ret = []
|
@@ -231,12 +240,14 @@ class Conversation:
|
|
231 |
|
232 |
def update_message(self, role, content, image=None, idx=-1):
|
233 |
assert len(self.messages) > 0, "No message in the conversation."
|
234 |
-
|
235 |
idx = (idx + len(self.messages)) % len(self.messages)
|
236 |
-
|
237 |
-
assert (
|
238 |
-
|
239 |
-
), f"Role mismatch: {role} vs {self.messages[idx]['role']}"
|
|
|
|
|
240 |
|
241 |
self.messages[idx]["content"] = content
|
242 |
if image is not None:
|
@@ -245,6 +256,8 @@ class Conversation:
|
|
245 |
if not isinstance(image, list):
|
246 |
image = [image]
|
247 |
self.messages[idx]["image"].extend(image)
|
|
|
|
|
248 |
|
249 |
def return_last_message(self):
|
250 |
return self.messages[-1]["content"]
|
|
|
173 |
images.append(image)
|
174 |
|
175 |
return images
|
176 |
+
|
177 |
+
def get_user_message(self, source: Union[str, None] = None):
|
178 |
+
assert len(self.messages) > 0, "No message in the conversation."
|
179 |
+
assert source in [self.USER, self.ASSISTANT, None], f"Invalid source: {source}"
|
180 |
+
for i, msg in enumerate(self.messages):
|
181 |
+
if source and msg["role"] != source:
|
182 |
+
continue
|
183 |
+
if msg["role"] == self.USER:
|
184 |
+
return msg["content"]
|
185 |
|
186 |
def to_gradio_chatbot(self):
|
187 |
ret = []
|
|
|
240 |
|
241 |
def update_message(self, role, content, image=None, idx=-1):
|
242 |
assert len(self.messages) > 0, "No message in the conversation."
|
243 |
+
print(f"Messsage: {self.messages}")
|
244 |
idx = (idx + len(self.messages)) % len(self.messages)
|
245 |
+
|
246 |
+
# assert (
|
247 |
+
# self.messages[idx]["role"] == role
|
248 |
+
# ), f"Role mismatch: {role} vs {self.messages[idx]['role']}"
|
249 |
+
if role != Conversation.ASSISTANT and self.messages[idx]["role"] != role:
|
250 |
+
raise AssertionError(f"Role mismatch: {role} vs {self.messages[idx]['role']}")
|
251 |
|
252 |
self.messages[idx]["content"] = content
|
253 |
if image is not None:
|
|
|
256 |
if not isinstance(image, list):
|
257 |
image = [image]
|
258 |
self.messages[idx]["image"].extend(image)
|
259 |
+
|
260 |
+
print(f"Updated message: {self.messages}")
|
261 |
|
262 |
def return_last_message(self):
|
263 |
return self.messages[-1]["content"]
|
logs/2025-01-15-conv.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"tstamp": 1736901847.9869, "like": false, "index": [0, 1], "model": "Vintern-1B-v3", "state": {"mandatory_system_message": "\u6211\u662f\u4e66\u751f\u00b7\u4e07\u8c61\uff0c\u82f1\u6587\u540d\u662fInternVL\uff0c\u662f\u7531\u4e0a\u6d77\u4eba\u5de5\u667a\u80fd\u5b9e\u9a8c\u5ba4\u3001\u6e05\u534e\u5927\u5b66\u53ca\u591a\u5bb6\u5408\u4f5c\u5355\u4f4d\u8054\u5408\u5f00\u53d1\u7684\u591a\u6a21\u6001\u5927\u8bed\u8a00\u6a21\u578b\u3002", "system_message": "\u8bf7\u5c3d\u53ef\u80fd\u8be6\u7ec6\u5730\u56de\u7b54\u7528\u6237\u7684\u95ee\u9898\u3002", "roles": ["system", "user", "assistant"], "messages": [{"role": "user", "content": "Please help me analyze this picture.", "image": ["logs/serve_images/2025-01-15/d7ea81988546544ca773fc48dc9da837.jpg"]}, {"role": "assistant", "content": "**COULD NOT LOAD MODEL. PLEASE REGENERATE OR REFRESH THIS PAGE.**"}]}, "ip": "127.0.0.1"}
|
2 |
+
{"tstamp": 1736901849.31, "like": false, "index": [0, 1], "model": "Vintern-1B-v3", "state": {"mandatory_system_message": "\u6211\u662f\u4e66\u751f\u00b7\u4e07\u8c61\uff0c\u82f1\u6587\u540d\u662fInternVL\uff0c\u662f\u7531\u4e0a\u6d77\u4eba\u5de5\u667a\u80fd\u5b9e\u9a8c\u5ba4\u3001\u6e05\u534e\u5927\u5b66\u53ca\u591a\u5bb6\u5408\u4f5c\u5355\u4f4d\u8054\u5408\u5f00\u53d1\u7684\u591a\u6a21\u6001\u5927\u8bed\u8a00\u6a21\u578b\u3002", "system_message": "\u8bf7\u5c3d\u53ef\u80fd\u8be6\u7ec6\u5730\u56de\u7b54\u7528\u6237\u7684\u95ee\u9898\u3002", "roles": ["system", "user", "assistant"], "messages": [{"role": "user", "content": "Please help me analyze this picture.", "image": ["logs/serve_images/2025-01-15/d7ea81988546544ca773fc48dc9da837.jpg"]}, {"role": "assistant", "content": "**COULD NOT LOAD MODEL. PLEASE REGENERATE OR REFRESH THIS PAGE.**"}]}, "ip": "127.0.0.1"}
|
3 |
+
{"tstamp": 1736901850.7274, "like": false, "index": [0, 1], "model": "Vintern-1B-v3", "state": {"mandatory_system_message": "\u6211\u662f\u4e66\u751f\u00b7\u4e07\u8c61\uff0c\u82f1\u6587\u540d\u662fInternVL\uff0c\u662f\u7531\u4e0a\u6d77\u4eba\u5de5\u667a\u80fd\u5b9e\u9a8c\u5ba4\u3001\u6e05\u534e\u5927\u5b66\u53ca\u591a\u5bb6\u5408\u4f5c\u5355\u4f4d\u8054\u5408\u5f00\u53d1\u7684\u591a\u6a21\u6001\u5927\u8bed\u8a00\u6a21\u578b\u3002", "system_message": "\u8bf7\u5c3d\u53ef\u80fd\u8be6\u7ec6\u5730\u56de\u7b54\u7528\u6237\u7684\u95ee\u9898\u3002", "roles": ["system", "user", "assistant"], "messages": [{"role": "user", "content": "Please help me analyze this picture.", "image": ["logs/serve_images/2025-01-15/d7ea81988546544ca773fc48dc9da837.jpg"]}, {"role": "assistant", "content": "**COULD NOT LOAD MODEL. PLEASE REGENERATE OR REFRESH THIS PAGE.**"}]}, "ip": "127.0.0.1"}
|
4 |
+
{"tstamp": 1736901851.5865, "like": true, "index": [0, 1], "model": "Vintern-1B-v3", "state": {"mandatory_system_message": "\u6211\u662f\u4e66\u751f\u00b7\u4e07\u8c61\uff0c\u82f1\u6587\u540d\u662fInternVL\uff0c\u662f\u7531\u4e0a\u6d77\u4eba\u5de5\u667a\u80fd\u5b9e\u9a8c\u5ba4\u3001\u6e05\u534e\u5927\u5b66\u53ca\u591a\u5bb6\u5408\u4f5c\u5355\u4f4d\u8054\u5408\u5f00\u53d1\u7684\u591a\u6a21\u6001\u5927\u8bed\u8a00\u6a21\u578b\u3002", "system_message": "\u8bf7\u5c3d\u53ef\u80fd\u8be6\u7ec6\u5730\u56de\u7b54\u7528\u6237\u7684\u95ee\u9898\u3002", "roles": ["system", "user", "assistant"], "messages": [{"role": "user", "content": "Please help me analyze this picture.", "image": ["logs/serve_images/2025-01-15/d7ea81988546544ca773fc48dc9da837.jpg"]}, {"role": "assistant", "content": "**COULD NOT LOAD MODEL. PLEASE REGENERATE OR REFRESH THIS PAGE.**"}]}, "ip": "127.0.0.1"}
|
5 |
+
{"tstamp": 1736901852.2976, "like": false, "index": [0, 1], "model": "Vintern-1B-v3", "state": {"mandatory_system_message": "\u6211\u662f\u4e66\u751f\u00b7\u4e07\u8c61\uff0c\u82f1\u6587\u540d\u662fInternVL\uff0c\u662f\u7531\u4e0a\u6d77\u4eba\u5de5\u667a\u80fd\u5b9e\u9a8c\u5ba4\u3001\u6e05\u534e\u5927\u5b66\u53ca\u591a\u5bb6\u5408\u4f5c\u5355\u4f4d\u8054\u5408\u5f00\u53d1\u7684\u591a\u6a21\u6001\u5927\u8bed\u8a00\u6a21\u578b\u3002", "system_message": "\u8bf7\u5c3d\u53ef\u80fd\u8be6\u7ec6\u5730\u56de\u7b54\u7528\u6237\u7684\u95ee\u9898\u3002", "roles": ["system", "user", "assistant"], "messages": [{"role": "user", "content": "Please help me analyze this picture.", "image": ["logs/serve_images/2025-01-15/d7ea81988546544ca773fc48dc9da837.jpg"]}, {"role": "assistant", "content": "**COULD NOT LOAD MODEL. PLEASE REGENERATE OR REFRESH THIS PAGE.**"}]}, "ip": "127.0.0.1"}
|
6 |
+
{"tstamp": 1736901853.9731, "like": false, "index": [0, 1], "model": "Vintern-1B-v3", "state": {"mandatory_system_message": "\u6211\u662f\u4e66\u751f\u00b7\u4e07\u8c61\uff0c\u82f1\u6587\u540d\u662fInternVL\uff0c\u662f\u7531\u4e0a\u6d77\u4eba\u5de5\u667a\u80fd\u5b9e\u9a8c\u5ba4\u3001\u6e05\u534e\u5927\u5b66\u53ca\u591a\u5bb6\u5408\u4f5c\u5355\u4f4d\u8054\u5408\u5f00\u53d1\u7684\u591a\u6a21\u6001\u5927\u8bed\u8a00\u6a21\u578b\u3002", "system_message": "\u8bf7\u5c3d\u53ef\u80fd\u8be6\u7ec6\u5730\u56de\u7b54\u7528\u6237\u7684\u95ee\u9898\u3002", "roles": ["system", "user", "assistant"], "messages": [{"role": "user", "content": "Please help me analyze this picture.", "image": ["logs/serve_images/2025-01-15/d7ea81988546544ca773fc48dc9da837.jpg"]}, {"role": "assistant", "content": "**COULD NOT LOAD MODEL. PLEASE REGENERATE OR REFRESH THIS PAGE.**"}]}, "ip": "127.0.0.1"}
|
7 |
+
{"tstamp": 1736901854.5329, "like": false, "index": [0, 1], "model": "Vintern-1B-v3", "state": {"mandatory_system_message": "\u6211\u662f\u4e66\u751f\u00b7\u4e07\u8c61\uff0c\u82f1\u6587\u540d\u662fInternVL\uff0c\u662f\u7531\u4e0a\u6d77\u4eba\u5de5\u667a\u80fd\u5b9e\u9a8c\u5ba4\u3001\u6e05\u534e\u5927\u5b66\u53ca\u591a\u5bb6\u5408\u4f5c\u5355\u4f4d\u8054\u5408\u5f00\u53d1\u7684\u591a\u6a21\u6001\u5927\u8bed\u8a00\u6a21\u578b\u3002", "system_message": "\u8bf7\u5c3d\u53ef\u80fd\u8be6\u7ec6\u5730\u56de\u7b54\u7528\u6237\u7684\u95ee\u9898\u3002", "roles": ["system", "user", "assistant"], "messages": [{"role": "user", "content": "Please help me analyze this picture.", "image": ["logs/serve_images/2025-01-15/d7ea81988546544ca773fc48dc9da837.jpg"]}, {"role": "assistant", "content": "**COULD NOT LOAD MODEL. PLEASE REGENERATE OR REFRESH THIS PAGE.**"}]}, "ip": "127.0.0.1"}
|
8 |
+
{"tstamp": 1736901854.6853, "like": false, "index": [0, 1], "model": "Vintern-1B-v3", "state": {"mandatory_system_message": "\u6211\u662f\u4e66\u751f\u00b7\u4e07\u8c61\uff0c\u82f1\u6587\u540d\u662fInternVL\uff0c\u662f\u7531\u4e0a\u6d77\u4eba\u5de5\u667a\u80fd\u5b9e\u9a8c\u5ba4\u3001\u6e05\u534e\u5927\u5b66\u53ca\u591a\u5bb6\u5408\u4f5c\u5355\u4f4d\u8054\u5408\u5f00\u53d1\u7684\u591a\u6a21\u6001\u5927\u8bed\u8a00\u6a21\u578b\u3002", "system_message": "\u8bf7\u5c3d\u53ef\u80fd\u8be6\u7ec6\u5730\u56de\u7b54\u7528\u6237\u7684\u95ee\u9898\u3002", "roles": ["system", "user", "assistant"], "messages": [{"role": "user", "content": "Please help me analyze this picture.", "image": ["logs/serve_images/2025-01-15/d7ea81988546544ca773fc48dc9da837.jpg"]}, {"role": "assistant", "content": "**COULD NOT LOAD MODEL. PLEASE REGENERATE OR REFRESH THIS PAGE.**"}]}, "ip": "127.0.0.1"}
|
9 |
+
{"tstamp": 1736903025.7072, "like": null, "model": "5CD-AI/Vintern-1B-v3_5", "start": 1736903024.824, "finish": 1736903024.824, "state": {"mandatory_system_message": "\u6211\u662f\u4e66\u751f\u00b7\u4e07\u8c61\uff0c\u82f1\u6587\u540d\u662fInternVL\uff0c\u662f\u7531\u4e0a\u6d77\u4eba\u5de5\u667a\u80fd\u5b9e\u9a8c\u5ba4\u3001\u6e05\u534e\u5927\u5b66\u53ca\u591a\u5bb6\u5408\u4f5c\u5355\u4f4d\u8054\u5408\u5f00\u53d1\u7684\u591a\u6a21\u6001\u5927\u8bed\u8a00\u6a21\u578b\u3002", "system_message": "\u8bf7\u5c3d\u53ef\u80fd\u8be6\u7ec6\u5730\u56de\u7b54\u7528\u6237\u7684\u95ee\u9898\u3002", "roles": ["system", "user", "assistant"], "messages": [{"role": "user", "content": "Please help me analyze this picture.", "image": ["logs/serve_images/2025-01-15/d7ea81988546544ca773fc48dc9da837.jpg"]}, {"role": "assistant", "content": "This is a test response"}]}, "images": ["logs/serve_images/2025-01-15/d7ea81988546544ca773fc48dc9da837.jpg"], "ip": "127.0.0.1"}
|
logs/2025-01-15-conv.json.lock
ADDED
File without changes
|
logs/gradio_web_server.log
ADDED
The diff for this file is too large to render.
See raw diff
|
|
logs/serve_images/2025-01-15/d7ea81988546544ca773fc48dc9da837.jpg
ADDED
models.py
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import numpy as np
|
3 |
+
import torch
|
4 |
+
import torchvision.transforms as T
|
5 |
+
from torchvision.transforms.functional import InterpolationMode
|
6 |
+
from PIL import Image
|
7 |
+
from PIL import Image, ExifTags
|
8 |
+
|
9 |
+
IMAGENET_MEAN = (0.485, 0.456, 0.406)
|
10 |
+
IMAGENET_STD = (0.229, 0.224, 0.225)
|
11 |
+
|
12 |
+
def build_transform(input_size):
|
13 |
+
MEAN, STD = IMAGENET_MEAN, IMAGENET_STD
|
14 |
+
transform = T.Compose([
|
15 |
+
T.Lambda(lambda img: img.convert('RGB') if img.mode != 'RGB' else img),
|
16 |
+
T.Resize((input_size, input_size), interpolation=InterpolationMode.BICUBIC),
|
17 |
+
T.ToTensor(),
|
18 |
+
T.Normalize(mean=MEAN, std=STD)
|
19 |
+
])
|
20 |
+
return transform
|
21 |
+
|
22 |
+
def find_closest_aspect_ratio(aspect_ratio, target_ratios, width, height, image_size):
|
23 |
+
best_ratio_diff = float('inf')
|
24 |
+
best_ratio = (1, 1)
|
25 |
+
area = width * height
|
26 |
+
for ratio in target_ratios:
|
27 |
+
target_aspect_ratio = ratio[0] / ratio[1]
|
28 |
+
ratio_diff = abs(aspect_ratio - target_aspect_ratio)
|
29 |
+
if ratio_diff < best_ratio_diff:
|
30 |
+
best_ratio_diff = ratio_diff
|
31 |
+
best_ratio = ratio
|
32 |
+
elif ratio_diff == best_ratio_diff:
|
33 |
+
if area > 0.5 * image_size * image_size * ratio[0] * ratio[1]:
|
34 |
+
best_ratio = ratio
|
35 |
+
return best_ratio
|
36 |
+
|
37 |
+
def dynamic_preprocess(image, min_num=1, max_num=12, image_size=448, use_thumbnail=False):
|
38 |
+
orig_width, orig_height = image.size
|
39 |
+
aspect_ratio = orig_width / orig_height
|
40 |
+
|
41 |
+
# calculate the existing image aspect ratio
|
42 |
+
target_ratios = set(
|
43 |
+
(i, j) for n in range(min_num, max_num + 1) for i in range(1, n + 1) for j in range(1, n + 1) if
|
44 |
+
i * j <= max_num and i * j >= min_num)
|
45 |
+
target_ratios = sorted(target_ratios, key=lambda x: x[0] * x[1])
|
46 |
+
|
47 |
+
# find the closest aspect ratio to the target
|
48 |
+
target_aspect_ratio = find_closest_aspect_ratio(
|
49 |
+
aspect_ratio, target_ratios, orig_width, orig_height, image_size)
|
50 |
+
|
51 |
+
# calculate the target width and height
|
52 |
+
target_width = image_size * target_aspect_ratio[0]
|
53 |
+
target_height = image_size * target_aspect_ratio[1]
|
54 |
+
blocks = target_aspect_ratio[0] * target_aspect_ratio[1]
|
55 |
+
|
56 |
+
# resize the image
|
57 |
+
resized_img = image.resize((target_width, target_height))
|
58 |
+
processed_images = []
|
59 |
+
for i in range(blocks):
|
60 |
+
box = (
|
61 |
+
(i % (target_width // image_size)) * image_size,
|
62 |
+
(i // (target_width // image_size)) * image_size,
|
63 |
+
((i % (target_width // image_size)) + 1) * image_size,
|
64 |
+
((i // (target_width // image_size)) + 1) * image_size
|
65 |
+
)
|
66 |
+
# split the image
|
67 |
+
split_img = resized_img.crop(box)
|
68 |
+
processed_images.append(split_img)
|
69 |
+
assert len(processed_images) == blocks
|
70 |
+
if use_thumbnail and len(processed_images) != 1:
|
71 |
+
thumbnail_img = image.resize((image_size, image_size))
|
72 |
+
processed_images.append(thumbnail_img)
|
73 |
+
return processed_images
|
74 |
+
|
75 |
+
def correct_image_orientation(image_path):
|
76 |
+
# Mở ảnh
|
77 |
+
image = Image.open(image_path)
|
78 |
+
|
79 |
+
# Kiểm tra dữ liệu Exif (nếu có)
|
80 |
+
try:
|
81 |
+
exif = image._getexif()
|
82 |
+
if exif is not None:
|
83 |
+
for tag, value in exif.items():
|
84 |
+
if ExifTags.TAGS.get(tag) == "Orientation":
|
85 |
+
# Sửa hướng dựa trên Orientation
|
86 |
+
if value == 3:
|
87 |
+
image = image.rotate(180, expand=True)
|
88 |
+
elif value == 6:
|
89 |
+
image = image.rotate(-90, expand=True)
|
90 |
+
elif value == 8:
|
91 |
+
image = image.rotate(90, expand=True)
|
92 |
+
break
|
93 |
+
except Exception as e:
|
94 |
+
print("Không thể xử lý Exif:", e)
|
95 |
+
|
96 |
+
return image
|
97 |
+
|
98 |
+
def load_image(image_file, input_size=448, max_num=12):
|
99 |
+
try:
|
100 |
+
print("Loading image:", image_file)
|
101 |
+
image = correct_image_orientation(image_file).convert('RGB')
|
102 |
+
print("Image size:", image.size)
|
103 |
+
transform = build_transform(input_size=input_size)
|
104 |
+
images = dynamic_preprocess(image, image_size=input_size, use_thumbnail=True, max_num=max_num)
|
105 |
+
print("Number of images:", len(images))
|
106 |
+
pixel_values = [transform(image) for image in images]
|
107 |
+
pixel_values = torch.stack(pixel_values)
|
108 |
+
print("Image loaded successfully.")
|
109 |
+
except Exception as e:
|
110 |
+
print("Error loading image:", e)
|
111 |
+
pixel_values = None
|
112 |
+
return pixel_values
|
utils.py
CHANGED
@@ -12,7 +12,7 @@ from constants import LOGDIR
|
|
12 |
import datetime
|
13 |
|
14 |
server_error_msg = (
|
15 |
-
"**
|
16 |
)
|
17 |
moderation_msg = (
|
18 |
"YOUR INPUT VIOLATES OUR CONTENT MODERATION GUIDELINES. PLEASE TRY AGAIN."
|
|
|
12 |
import datetime
|
13 |
|
14 |
server_error_msg = (
|
15 |
+
"**COULD NOT LOAD MODEL. PLEASE REGENERATE OR REFRESH THIS PAGE.**"
|
16 |
)
|
17 |
moderation_msg = (
|
18 |
"YOUR INPUT VIOLATES OUR CONTENT MODERATION GUIDELINES. PLEASE TRY AGAIN."
|