baohuynhbk14 commited on
Commit
b30d21d
·
1 Parent(s): b41e98c

Add image processing functions and update error messages in utils.py

Browse files
__pycache__/constants.cpython-39.pyc ADDED
Binary file (722 Bytes). View file
 
__pycache__/conversation.cpython-39.pyc ADDED
Binary file (8.63 kB). View file
 
__pycache__/models.cpython-39.pyc ADDED
Binary file (3.76 kB). View file
 
__pycache__/utils.cpython-39.pyc ADDED
Binary file (4.92 kB). View file
 
app.py CHANGED
@@ -7,14 +7,14 @@ import os
7
  import time
8
  import hashlib
9
  import re
10
-
11
  import gradio as gr
12
  import requests
13
  import random
14
  from filelock import FileLock
15
  from io import BytesIO
16
  from PIL import Image, ImageDraw, ImageFont
17
-
18
  from constants import LOGDIR
19
  from utils import (
20
  build_logger,
@@ -25,7 +25,8 @@ from utils import (
25
  get_log_filename,
26
  )
27
  from threading import Thread
28
- import torch
 
29
  from conversation import Conversation
30
  from transformers import AutoModel, AutoTokenizer, TextIteratorStreamer
31
 
@@ -166,6 +167,7 @@ def add_text(state, message, system_prompt, request: gr.Request):
166
  ) * 5
167
 
168
  model_name = "5CD-AI/Vintern-1B-v3_5"
 
169
  model = AutoModel.from_pretrained(
170
  model_name,
171
  torch_dtype=torch.bfloat16,
@@ -196,7 +198,6 @@ def http_bot(
196
  ) + (no_change_btn,) * 5
197
  return
198
 
199
- # No available worker
200
  if model is None:
201
  # state.messages[-1][-1] = server_error_msg
202
  state.update_message(Conversation.ASSISTANT, server_error_msg)
@@ -225,16 +226,33 @@ def http_bot(
225
  try:
226
  # Stream output
227
  # response = requests.post(worker_addr, json=pload, headers=headers, stream=True, timeout=300)
228
- streamer = TextIteratorStreamer(
229
- tokenizer, skip_prompt=True, skip_special_tokens=True
230
- )
231
- generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024)
232
-
233
- thread = Thread(target=model.generate, kwargs=generation_kwargs)
234
- thread.start()
235
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
236
  buffer = ""
237
- for new_text in streamer:
238
  buffer += new_text
239
  # Remove <|im_end|> or similar tokens from the output
240
  buffer = buffer.replace("<|im_end|>", "")
@@ -247,6 +265,8 @@ def http_bot(
247
  ) + (disable_btn,) * 5
248
 
249
  except Exception as e:
 
 
250
  state.update_message(Conversation.ASSISTANT, server_error_msg, None)
251
  yield (
252
  state,
@@ -289,20 +309,19 @@ def http_bot(
289
 
290
  # <h1 style="font-size: 28px; font-weight: bold;">Expanding Performance Boundaries of Open-Source Multimodal Models with Model, Data, and Test-Time Scaling</h1>
291
  title_html = """
292
- <img src="https://internvl.opengvlab.com/assets/logo-47b364d3.jpg" style="width: 280px; height: 70px;">
293
- <p>Vintern-1B: Expanding Performance Boundaries of Open-Source Multimodal Models with Model, Data, and Test-Time Scaling</p>
294
- <a href="https://internvl.github.io/blog/2024-12-05-InternVL-2.5/">[🆕 InternVL Blog]</a>
295
- <a href="https://huggingface.co/papers/2412.05271">[📖 InternVL Paper]</a>
296
- <a href="https://github.com/OpenGVLab/InternVL">[🌟 Github]</a><br>
297
- <a href="https://internvl.readthedocs.io/en/latest/">[📜 Document]</a>
298
- <a href="https://internvl.opengvlab.com/">[🗨️ Official Demo]</a>
299
  """
300
 
301
 
302
  tos_markdown = """
303
  ### Terms of use
304
  By using this service, users are required to agree to the following terms:
305
- The service is a research preview intended for non-commercial use only. It only provides limited safety measures and may generate offensive content. It must not be used for any illegal, harmful, violent, racist, or sexual purposes. The service may collect user dialogue data for future research.
306
  Please click the "Flag" button if you get any inappropriate answer! We will collect those to keep improving our moderator.
307
  For an optimal experience, please use desktop computers for this demo, as mobile devices may compromise its quality.
308
  """
@@ -332,45 +351,45 @@ block_css = """
332
  }
333
  """
334
 
335
- js = """
336
- function createWaveAnimation() {
337
- const text = document.getElementById('text');
338
- var i = 0;
339
- setInterval(function() {
340
- const colors = [
341
- 'red, orange, yellow, green, blue, indigo, violet, purple',
342
- 'orange, yellow, green, blue, indigo, violet, purple, red',
343
- 'yellow, green, blue, indigo, violet, purple, red, orange',
344
- 'green, blue, indigo, violet, purple, red, orange, yellow',
345
- 'blue, indigo, violet, purple, red, orange, yellow, green',
346
- 'indigo, violet, purple, red, orange, yellow, green, blue',
347
- 'violet, purple, red, orange, yellow, green, blue, indigo',
348
- 'purple, red, orange, yellow, green, blue, indigo, violet',
349
- ];
350
- const angle = 45;
351
- const colorIndex = i % colors.length;
352
- text.style.background = `linear-gradient(${angle}deg, ${colors[colorIndex]})`;
353
- text.style.webkitBackgroundClip = 'text';
354
- text.style.backgroundClip = 'text';
355
- text.style.color = 'transparent';
356
- text.style.fontSize = '28px';
357
- text.style.width = 'auto';
358
- text.textContent = 'Vintern-1B';
359
- text.style.fontWeight = 'bold';
360
- i += 1;
361
- }, 200);
362
- const params = new URLSearchParams(window.location.search);
363
- url_params = Object.fromEntries(params);
364
- // console.log(url_params);
365
- // console.log('hello world...');
366
- // console.log(window.location.search);
367
- // console.log('hello world...');
368
- // alert(window.location.search)
369
- // alert(url_params);
370
- return url_params;
371
- }
372
-
373
- """
374
 
375
 
376
  def build_demo():
@@ -472,7 +491,7 @@ def build_demo():
472
  with gr.Column(scale=8):
473
  chatbot = gr.Chatbot(
474
  elem_id="chatbot",
475
- label="InternVL",
476
  height=580,
477
  show_copy_button=True,
478
  show_share_button=True,
 
7
  import time
8
  import hashlib
9
  import re
10
+ import torch
11
  import gradio as gr
12
  import requests
13
  import random
14
  from filelock import FileLock
15
  from io import BytesIO
16
  from PIL import Image, ImageDraw, ImageFont
17
+ from models import load_image
18
  from constants import LOGDIR
19
  from utils import (
20
  build_logger,
 
25
  get_log_filename,
26
  )
27
  from threading import Thread
28
+ import traceback
29
+ # import torch
30
  from conversation import Conversation
31
  from transformers import AutoModel, AutoTokenizer, TextIteratorStreamer
32
 
 
167
  ) * 5
168
 
169
  model_name = "5CD-AI/Vintern-1B-v3_5"
170
+ model = None
171
  model = AutoModel.from_pretrained(
172
  model_name,
173
  torch_dtype=torch.bfloat16,
 
198
  ) + (no_change_btn,) * 5
199
  return
200
 
 
201
  if model is None:
202
  # state.messages[-1][-1] = server_error_msg
203
  state.update_message(Conversation.ASSISTANT, server_error_msg)
 
226
  try:
227
  # Stream output
228
  # response = requests.post(worker_addr, json=pload, headers=headers, stream=True, timeout=300)
229
+ print(f"all_image_paths: {all_image_paths}")
230
+
231
+ pixel_values = load_image(all_image_paths[0], max_num=6).to(torch.bfloat16)
232
+ print(f"pixel_values: {pixel_values}")
233
+ generation_config = dict(max_new_tokens= 700, do_sample=False, num_beams = 3, repetition_penalty=2.5)
234
+ message = state.get_user_message(source=state.USER)
235
+ print(f"######################")
236
+ print(f"message: {message}")
237
+ if pixel_values is not None:
238
+ question = '<image>\n'+message
239
+ else:
240
+ question = message
241
+ response, conv_history = model.chat(tokenizer, pixel_values, question, generation_config, history=None, return_history=True)
242
+ print(f"AI response: {response}")
243
+
244
+
245
+ # streamer = TextIteratorStreamer(
246
+ # tokenizer, skip_prompt=True, skip_special_tokens=True
247
+ # )
248
+ # generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024)
249
+
250
+ # thread = Thread(target=model.generate, kwargs=generation_kwargs)
251
+ # thread.start()
252
+
253
+ # response = "This is a test response"
254
  buffer = ""
255
+ for new_text in response:
256
  buffer += new_text
257
  # Remove <|im_end|> or similar tokens from the output
258
  buffer = buffer.replace("<|im_end|>", "")
 
265
  ) + (disable_btn,) * 5
266
 
267
  except Exception as e:
268
+ logger.error(f"Error in http_bot: {e}")
269
+ traceback.print_exc()
270
  state.update_message(Conversation.ASSISTANT, server_error_msg, None)
271
  yield (
272
  state,
 
309
 
310
  # <h1 style="font-size: 28px; font-weight: bold;">Expanding Performance Boundaries of Open-Source Multimodal Models with Model, Data, and Test-Time Scaling</h1>
311
  title_html = """
312
+ <div style="text-align: center;">
313
+ <img src="https://lh3.googleusercontent.com/pw/AP1GczMmW-aFQ4dNaR_LCAllh4UZLLx9fTZ1ITHeGVMWx-1bwlIWz4VsWJSGb3_9C7CQfvboqJH41y2Sbc5ToC9ZmKeV4-buf_DEevIMU0HtaLWgHAPOqBiIbG6LaE8CvDqniLZzvB9UX8TR_-YgvYzPFt2z=w1472-h832-s-no-gm?authuser=0" style="height: 100; width: 100%;">
314
+ <p>Vintern-1B: An Efficient Multimodal Large Language Model for Vietnamese</p>
315
+ <a href="https://huggingface.co/papers/2408.12480">[📖 Vintern Paper]</a>
316
+ <a href="https://huggingface.co/5CD-AI">[🤗 5CD-AI Huggingface]</a>
317
+ </div>
 
318
  """
319
 
320
 
321
  tos_markdown = """
322
  ### Terms of use
323
  By using this service, users are required to agree to the following terms:
324
+ It only provides limited safety measures and may generate offensive content. It must not be used for any illegal, harmful, violent, racist, or sexual purposes. The service may collect user dialogue data for future research.
325
  Please click the "Flag" button if you get any inappropriate answer! We will collect those to keep improving our moderator.
326
  For an optimal experience, please use desktop computers for this demo, as mobile devices may compromise its quality.
327
  """
 
351
  }
352
  """
353
 
354
+ # js = """
355
+ # function createWaveAnimation() {
356
+ # const text = document.getElementById('text');
357
+ # var i = 0;
358
+ # setInterval(function() {
359
+ # const colors = [
360
+ # 'red, orange, yellow, green, blue, indigo, violet, purple',
361
+ # 'orange, yellow, green, blue, indigo, violet, purple, red',
362
+ # 'yellow, green, blue, indigo, violet, purple, red, orange',
363
+ # 'green, blue, indigo, violet, purple, red, orange, yellow',
364
+ # 'blue, indigo, violet, purple, red, orange, yellow, green',
365
+ # 'indigo, violet, purple, red, orange, yellow, green, blue',
366
+ # 'violet, purple, red, orange, yellow, green, blue, indigo',
367
+ # 'purple, red, orange, yellow, green, blue, indigo, violet',
368
+ # ];
369
+ # const angle = 45;
370
+ # const colorIndex = i % colors.length;
371
+ # text.style.background = `linear-gradient(${angle}deg, ${colors[colorIndex]})`;
372
+ # text.style.webkitBackgroundClip = 'text';
373
+ # text.style.backgroundClip = 'text';
374
+ # text.style.color = 'transparent';
375
+ # text.style.fontSize = '28px';
376
+ # text.style.width = 'auto';
377
+ # text.textContent = 'Vintern-1B';
378
+ # text.style.fontWeight = 'bold';
379
+ # i += 1;
380
+ # }, 200);
381
+ # const params = new URLSearchParams(window.location.search);
382
+ # url_params = Object.fromEntries(params);
383
+ # // console.log(url_params);
384
+ # // console.log('hello world...');
385
+ # // console.log(window.location.search);
386
+ # // console.log('hello world...');
387
+ # // alert(window.location.search)
388
+ # // alert(url_params);
389
+ # return url_params;
390
+ # }
391
+
392
+ # """
393
 
394
 
395
  def build_demo():
 
491
  with gr.Column(scale=8):
492
  chatbot = gr.Chatbot(
493
  elem_id="chatbot",
494
+ label="Vintern",
495
  height=580,
496
  show_copy_button=True,
497
  show_share_button=True,
conversation.py CHANGED
@@ -173,6 +173,15 @@ class Conversation:
173
  images.append(image)
174
 
175
  return images
 
 
 
 
 
 
 
 
 
176
 
177
  def to_gradio_chatbot(self):
178
  ret = []
@@ -231,12 +240,14 @@ class Conversation:
231
 
232
  def update_message(self, role, content, image=None, idx=-1):
233
  assert len(self.messages) > 0, "No message in the conversation."
234
-
235
  idx = (idx + len(self.messages)) % len(self.messages)
236
-
237
- assert (
238
- self.messages[idx]["role"] == role
239
- ), f"Role mismatch: {role} vs {self.messages[idx]['role']}"
 
 
240
 
241
  self.messages[idx]["content"] = content
242
  if image is not None:
@@ -245,6 +256,8 @@ class Conversation:
245
  if not isinstance(image, list):
246
  image = [image]
247
  self.messages[idx]["image"].extend(image)
 
 
248
 
249
  def return_last_message(self):
250
  return self.messages[-1]["content"]
 
173
  images.append(image)
174
 
175
  return images
176
+
177
+ def get_user_message(self, source: Union[str, None] = None):
178
+ assert len(self.messages) > 0, "No message in the conversation."
179
+ assert source in [self.USER, self.ASSISTANT, None], f"Invalid source: {source}"
180
+ for i, msg in enumerate(self.messages):
181
+ if source and msg["role"] != source:
182
+ continue
183
+ if msg["role"] == self.USER:
184
+ return msg["content"]
185
 
186
  def to_gradio_chatbot(self):
187
  ret = []
 
240
 
241
  def update_message(self, role, content, image=None, idx=-1):
242
  assert len(self.messages) > 0, "No message in the conversation."
243
+ print(f"Messsage: {self.messages}")
244
  idx = (idx + len(self.messages)) % len(self.messages)
245
+
246
+ # assert (
247
+ # self.messages[idx]["role"] == role
248
+ # ), f"Role mismatch: {role} vs {self.messages[idx]['role']}"
249
+ if role != Conversation.ASSISTANT and self.messages[idx]["role"] != role:
250
+ raise AssertionError(f"Role mismatch: {role} vs {self.messages[idx]['role']}")
251
 
252
  self.messages[idx]["content"] = content
253
  if image is not None:
 
256
  if not isinstance(image, list):
257
  image = [image]
258
  self.messages[idx]["image"].extend(image)
259
+
260
+ print(f"Updated message: {self.messages}")
261
 
262
  def return_last_message(self):
263
  return self.messages[-1]["content"]
logs/2025-01-15-conv.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {"tstamp": 1736901847.9869, "like": false, "index": [0, 1], "model": "Vintern-1B-v3", "state": {"mandatory_system_message": "\u6211\u662f\u4e66\u751f\u00b7\u4e07\u8c61\uff0c\u82f1\u6587\u540d\u662fInternVL\uff0c\u662f\u7531\u4e0a\u6d77\u4eba\u5de5\u667a\u80fd\u5b9e\u9a8c\u5ba4\u3001\u6e05\u534e\u5927\u5b66\u53ca\u591a\u5bb6\u5408\u4f5c\u5355\u4f4d\u8054\u5408\u5f00\u53d1\u7684\u591a\u6a21\u6001\u5927\u8bed\u8a00\u6a21\u578b\u3002", "system_message": "\u8bf7\u5c3d\u53ef\u80fd\u8be6\u7ec6\u5730\u56de\u7b54\u7528\u6237\u7684\u95ee\u9898\u3002", "roles": ["system", "user", "assistant"], "messages": [{"role": "user", "content": "Please help me analyze this picture.", "image": ["logs/serve_images/2025-01-15/d7ea81988546544ca773fc48dc9da837.jpg"]}, {"role": "assistant", "content": "**COULD NOT LOAD MODEL. PLEASE REGENERATE OR REFRESH THIS PAGE.**"}]}, "ip": "127.0.0.1"}
2
+ {"tstamp": 1736901849.31, "like": false, "index": [0, 1], "model": "Vintern-1B-v3", "state": {"mandatory_system_message": "\u6211\u662f\u4e66\u751f\u00b7\u4e07\u8c61\uff0c\u82f1\u6587\u540d\u662fInternVL\uff0c\u662f\u7531\u4e0a\u6d77\u4eba\u5de5\u667a\u80fd\u5b9e\u9a8c\u5ba4\u3001\u6e05\u534e\u5927\u5b66\u53ca\u591a\u5bb6\u5408\u4f5c\u5355\u4f4d\u8054\u5408\u5f00\u53d1\u7684\u591a\u6a21\u6001\u5927\u8bed\u8a00\u6a21\u578b\u3002", "system_message": "\u8bf7\u5c3d\u53ef\u80fd\u8be6\u7ec6\u5730\u56de\u7b54\u7528\u6237\u7684\u95ee\u9898\u3002", "roles": ["system", "user", "assistant"], "messages": [{"role": "user", "content": "Please help me analyze this picture.", "image": ["logs/serve_images/2025-01-15/d7ea81988546544ca773fc48dc9da837.jpg"]}, {"role": "assistant", "content": "**COULD NOT LOAD MODEL. PLEASE REGENERATE OR REFRESH THIS PAGE.**"}]}, "ip": "127.0.0.1"}
3
+ {"tstamp": 1736901850.7274, "like": false, "index": [0, 1], "model": "Vintern-1B-v3", "state": {"mandatory_system_message": "\u6211\u662f\u4e66\u751f\u00b7\u4e07\u8c61\uff0c\u82f1\u6587\u540d\u662fInternVL\uff0c\u662f\u7531\u4e0a\u6d77\u4eba\u5de5\u667a\u80fd\u5b9e\u9a8c\u5ba4\u3001\u6e05\u534e\u5927\u5b66\u53ca\u591a\u5bb6\u5408\u4f5c\u5355\u4f4d\u8054\u5408\u5f00\u53d1\u7684\u591a\u6a21\u6001\u5927\u8bed\u8a00\u6a21\u578b\u3002", "system_message": "\u8bf7\u5c3d\u53ef\u80fd\u8be6\u7ec6\u5730\u56de\u7b54\u7528\u6237\u7684\u95ee\u9898\u3002", "roles": ["system", "user", "assistant"], "messages": [{"role": "user", "content": "Please help me analyze this picture.", "image": ["logs/serve_images/2025-01-15/d7ea81988546544ca773fc48dc9da837.jpg"]}, {"role": "assistant", "content": "**COULD NOT LOAD MODEL. PLEASE REGENERATE OR REFRESH THIS PAGE.**"}]}, "ip": "127.0.0.1"}
4
+ {"tstamp": 1736901851.5865, "like": true, "index": [0, 1], "model": "Vintern-1B-v3", "state": {"mandatory_system_message": "\u6211\u662f\u4e66\u751f\u00b7\u4e07\u8c61\uff0c\u82f1\u6587\u540d\u662fInternVL\uff0c\u662f\u7531\u4e0a\u6d77\u4eba\u5de5\u667a\u80fd\u5b9e\u9a8c\u5ba4\u3001\u6e05\u534e\u5927\u5b66\u53ca\u591a\u5bb6\u5408\u4f5c\u5355\u4f4d\u8054\u5408\u5f00\u53d1\u7684\u591a\u6a21\u6001\u5927\u8bed\u8a00\u6a21\u578b\u3002", "system_message": "\u8bf7\u5c3d\u53ef\u80fd\u8be6\u7ec6\u5730\u56de\u7b54\u7528\u6237\u7684\u95ee\u9898\u3002", "roles": ["system", "user", "assistant"], "messages": [{"role": "user", "content": "Please help me analyze this picture.", "image": ["logs/serve_images/2025-01-15/d7ea81988546544ca773fc48dc9da837.jpg"]}, {"role": "assistant", "content": "**COULD NOT LOAD MODEL. PLEASE REGENERATE OR REFRESH THIS PAGE.**"}]}, "ip": "127.0.0.1"}
5
+ {"tstamp": 1736901852.2976, "like": false, "index": [0, 1], "model": "Vintern-1B-v3", "state": {"mandatory_system_message": "\u6211\u662f\u4e66\u751f\u00b7\u4e07\u8c61\uff0c\u82f1\u6587\u540d\u662fInternVL\uff0c\u662f\u7531\u4e0a\u6d77\u4eba\u5de5\u667a\u80fd\u5b9e\u9a8c\u5ba4\u3001\u6e05\u534e\u5927\u5b66\u53ca\u591a\u5bb6\u5408\u4f5c\u5355\u4f4d\u8054\u5408\u5f00\u53d1\u7684\u591a\u6a21\u6001\u5927\u8bed\u8a00\u6a21\u578b\u3002", "system_message": "\u8bf7\u5c3d\u53ef\u80fd\u8be6\u7ec6\u5730\u56de\u7b54\u7528\u6237\u7684\u95ee\u9898\u3002", "roles": ["system", "user", "assistant"], "messages": [{"role": "user", "content": "Please help me analyze this picture.", "image": ["logs/serve_images/2025-01-15/d7ea81988546544ca773fc48dc9da837.jpg"]}, {"role": "assistant", "content": "**COULD NOT LOAD MODEL. PLEASE REGENERATE OR REFRESH THIS PAGE.**"}]}, "ip": "127.0.0.1"}
6
+ {"tstamp": 1736901853.9731, "like": false, "index": [0, 1], "model": "Vintern-1B-v3", "state": {"mandatory_system_message": "\u6211\u662f\u4e66\u751f\u00b7\u4e07\u8c61\uff0c\u82f1\u6587\u540d\u662fInternVL\uff0c\u662f\u7531\u4e0a\u6d77\u4eba\u5de5\u667a\u80fd\u5b9e\u9a8c\u5ba4\u3001\u6e05\u534e\u5927\u5b66\u53ca\u591a\u5bb6\u5408\u4f5c\u5355\u4f4d\u8054\u5408\u5f00\u53d1\u7684\u591a\u6a21\u6001\u5927\u8bed\u8a00\u6a21\u578b\u3002", "system_message": "\u8bf7\u5c3d\u53ef\u80fd\u8be6\u7ec6\u5730\u56de\u7b54\u7528\u6237\u7684\u95ee\u9898\u3002", "roles": ["system", "user", "assistant"], "messages": [{"role": "user", "content": "Please help me analyze this picture.", "image": ["logs/serve_images/2025-01-15/d7ea81988546544ca773fc48dc9da837.jpg"]}, {"role": "assistant", "content": "**COULD NOT LOAD MODEL. PLEASE REGENERATE OR REFRESH THIS PAGE.**"}]}, "ip": "127.0.0.1"}
7
+ {"tstamp": 1736901854.5329, "like": false, "index": [0, 1], "model": "Vintern-1B-v3", "state": {"mandatory_system_message": "\u6211\u662f\u4e66\u751f\u00b7\u4e07\u8c61\uff0c\u82f1\u6587\u540d\u662fInternVL\uff0c\u662f\u7531\u4e0a\u6d77\u4eba\u5de5\u667a\u80fd\u5b9e\u9a8c\u5ba4\u3001\u6e05\u534e\u5927\u5b66\u53ca\u591a\u5bb6\u5408\u4f5c\u5355\u4f4d\u8054\u5408\u5f00\u53d1\u7684\u591a\u6a21\u6001\u5927\u8bed\u8a00\u6a21\u578b\u3002", "system_message": "\u8bf7\u5c3d\u53ef\u80fd\u8be6\u7ec6\u5730\u56de\u7b54\u7528\u6237\u7684\u95ee\u9898\u3002", "roles": ["system", "user", "assistant"], "messages": [{"role": "user", "content": "Please help me analyze this picture.", "image": ["logs/serve_images/2025-01-15/d7ea81988546544ca773fc48dc9da837.jpg"]}, {"role": "assistant", "content": "**COULD NOT LOAD MODEL. PLEASE REGENERATE OR REFRESH THIS PAGE.**"}]}, "ip": "127.0.0.1"}
8
+ {"tstamp": 1736901854.6853, "like": false, "index": [0, 1], "model": "Vintern-1B-v3", "state": {"mandatory_system_message": "\u6211\u662f\u4e66\u751f\u00b7\u4e07\u8c61\uff0c\u82f1\u6587\u540d\u662fInternVL\uff0c\u662f\u7531\u4e0a\u6d77\u4eba\u5de5\u667a\u80fd\u5b9e\u9a8c\u5ba4\u3001\u6e05\u534e\u5927\u5b66\u53ca\u591a\u5bb6\u5408\u4f5c\u5355\u4f4d\u8054\u5408\u5f00\u53d1\u7684\u591a\u6a21\u6001\u5927\u8bed\u8a00\u6a21\u578b\u3002", "system_message": "\u8bf7\u5c3d\u53ef\u80fd\u8be6\u7ec6\u5730\u56de\u7b54\u7528\u6237\u7684\u95ee\u9898\u3002", "roles": ["system", "user", "assistant"], "messages": [{"role": "user", "content": "Please help me analyze this picture.", "image": ["logs/serve_images/2025-01-15/d7ea81988546544ca773fc48dc9da837.jpg"]}, {"role": "assistant", "content": "**COULD NOT LOAD MODEL. PLEASE REGENERATE OR REFRESH THIS PAGE.**"}]}, "ip": "127.0.0.1"}
9
+ {"tstamp": 1736903025.7072, "like": null, "model": "5CD-AI/Vintern-1B-v3_5", "start": 1736903024.824, "finish": 1736903024.824, "state": {"mandatory_system_message": "\u6211\u662f\u4e66\u751f\u00b7\u4e07\u8c61\uff0c\u82f1\u6587\u540d\u662fInternVL\uff0c\u662f\u7531\u4e0a\u6d77\u4eba\u5de5\u667a\u80fd\u5b9e\u9a8c\u5ba4\u3001\u6e05\u534e\u5927\u5b66\u53ca\u591a\u5bb6\u5408\u4f5c\u5355\u4f4d\u8054\u5408\u5f00\u53d1\u7684\u591a\u6a21\u6001\u5927\u8bed\u8a00\u6a21\u578b\u3002", "system_message": "\u8bf7\u5c3d\u53ef\u80fd\u8be6\u7ec6\u5730\u56de\u7b54\u7528\u6237\u7684\u95ee\u9898\u3002", "roles": ["system", "user", "assistant"], "messages": [{"role": "user", "content": "Please help me analyze this picture.", "image": ["logs/serve_images/2025-01-15/d7ea81988546544ca773fc48dc9da837.jpg"]}, {"role": "assistant", "content": "This is a test response"}]}, "images": ["logs/serve_images/2025-01-15/d7ea81988546544ca773fc48dc9da837.jpg"], "ip": "127.0.0.1"}
logs/2025-01-15-conv.json.lock ADDED
File without changes
logs/gradio_web_server.log ADDED
The diff for this file is too large to render. See raw diff
 
logs/serve_images/2025-01-15/d7ea81988546544ca773fc48dc9da837.jpg ADDED
models.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import numpy as np
3
+ import torch
4
+ import torchvision.transforms as T
5
+ from torchvision.transforms.functional import InterpolationMode
6
+ from PIL import Image
7
+ from PIL import Image, ExifTags
8
+
9
+ IMAGENET_MEAN = (0.485, 0.456, 0.406)
10
+ IMAGENET_STD = (0.229, 0.224, 0.225)
11
+
12
+ def build_transform(input_size):
13
+ MEAN, STD = IMAGENET_MEAN, IMAGENET_STD
14
+ transform = T.Compose([
15
+ T.Lambda(lambda img: img.convert('RGB') if img.mode != 'RGB' else img),
16
+ T.Resize((input_size, input_size), interpolation=InterpolationMode.BICUBIC),
17
+ T.ToTensor(),
18
+ T.Normalize(mean=MEAN, std=STD)
19
+ ])
20
+ return transform
21
+
22
+ def find_closest_aspect_ratio(aspect_ratio, target_ratios, width, height, image_size):
23
+ best_ratio_diff = float('inf')
24
+ best_ratio = (1, 1)
25
+ area = width * height
26
+ for ratio in target_ratios:
27
+ target_aspect_ratio = ratio[0] / ratio[1]
28
+ ratio_diff = abs(aspect_ratio - target_aspect_ratio)
29
+ if ratio_diff < best_ratio_diff:
30
+ best_ratio_diff = ratio_diff
31
+ best_ratio = ratio
32
+ elif ratio_diff == best_ratio_diff:
33
+ if area > 0.5 * image_size * image_size * ratio[0] * ratio[1]:
34
+ best_ratio = ratio
35
+ return best_ratio
36
+
37
+ def dynamic_preprocess(image, min_num=1, max_num=12, image_size=448, use_thumbnail=False):
38
+ orig_width, orig_height = image.size
39
+ aspect_ratio = orig_width / orig_height
40
+
41
+ # calculate the existing image aspect ratio
42
+ target_ratios = set(
43
+ (i, j) for n in range(min_num, max_num + 1) for i in range(1, n + 1) for j in range(1, n + 1) if
44
+ i * j <= max_num and i * j >= min_num)
45
+ target_ratios = sorted(target_ratios, key=lambda x: x[0] * x[1])
46
+
47
+ # find the closest aspect ratio to the target
48
+ target_aspect_ratio = find_closest_aspect_ratio(
49
+ aspect_ratio, target_ratios, orig_width, orig_height, image_size)
50
+
51
+ # calculate the target width and height
52
+ target_width = image_size * target_aspect_ratio[0]
53
+ target_height = image_size * target_aspect_ratio[1]
54
+ blocks = target_aspect_ratio[0] * target_aspect_ratio[1]
55
+
56
+ # resize the image
57
+ resized_img = image.resize((target_width, target_height))
58
+ processed_images = []
59
+ for i in range(blocks):
60
+ box = (
61
+ (i % (target_width // image_size)) * image_size,
62
+ (i // (target_width // image_size)) * image_size,
63
+ ((i % (target_width // image_size)) + 1) * image_size,
64
+ ((i // (target_width // image_size)) + 1) * image_size
65
+ )
66
+ # split the image
67
+ split_img = resized_img.crop(box)
68
+ processed_images.append(split_img)
69
+ assert len(processed_images) == blocks
70
+ if use_thumbnail and len(processed_images) != 1:
71
+ thumbnail_img = image.resize((image_size, image_size))
72
+ processed_images.append(thumbnail_img)
73
+ return processed_images
74
+
75
+ def correct_image_orientation(image_path):
76
+ # Mở ảnh
77
+ image = Image.open(image_path)
78
+
79
+ # Kiểm tra dữ liệu Exif (nếu có)
80
+ try:
81
+ exif = image._getexif()
82
+ if exif is not None:
83
+ for tag, value in exif.items():
84
+ if ExifTags.TAGS.get(tag) == "Orientation":
85
+ # Sửa hướng dựa trên Orientation
86
+ if value == 3:
87
+ image = image.rotate(180, expand=True)
88
+ elif value == 6:
89
+ image = image.rotate(-90, expand=True)
90
+ elif value == 8:
91
+ image = image.rotate(90, expand=True)
92
+ break
93
+ except Exception as e:
94
+ print("Không thể xử lý Exif:", e)
95
+
96
+ return image
97
+
98
+ def load_image(image_file, input_size=448, max_num=12):
99
+ try:
100
+ print("Loading image:", image_file)
101
+ image = correct_image_orientation(image_file).convert('RGB')
102
+ print("Image size:", image.size)
103
+ transform = build_transform(input_size=input_size)
104
+ images = dynamic_preprocess(image, image_size=input_size, use_thumbnail=True, max_num=max_num)
105
+ print("Number of images:", len(images))
106
+ pixel_values = [transform(image) for image in images]
107
+ pixel_values = torch.stack(pixel_values)
108
+ print("Image loaded successfully.")
109
+ except Exception as e:
110
+ print("Error loading image:", e)
111
+ pixel_values = None
112
+ return pixel_values
utils.py CHANGED
@@ -12,7 +12,7 @@ from constants import LOGDIR
12
  import datetime
13
 
14
  server_error_msg = (
15
- "**NETWORK ERROR DUE TO HIGH TRAFFIC. PLEASE REGENERATE OR REFRESH THIS PAGE.**"
16
  )
17
  moderation_msg = (
18
  "YOUR INPUT VIOLATES OUR CONTENT MODERATION GUIDELINES. PLEASE TRY AGAIN."
 
12
  import datetime
13
 
14
  server_error_msg = (
15
+ "**COULD NOT LOAD MODEL. PLEASE REGENERATE OR REFRESH THIS PAGE.**"
16
  )
17
  moderation_msg = (
18
  "YOUR INPUT VIOLATES OUR CONTENT MODERATION GUIDELINES. PLEASE TRY AGAIN."