runninglsy commited on
Commit
70dccca
·
1 Parent(s): 42375bc
.gitattributes CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.png filter=lfs diff=lfs merge=lfs -text
37
+ *.jpg filter=lfs diff=lfs merge=lfs -text
38
+ *.jpeg filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,13 +1,14 @@
1
  ---
2
  title: Ovis2 4B
3
- emoji: 🐢
4
- colorFrom: purple
5
- colorTo: yellow
6
  sdk: gradio
7
- sdk_version: 5.16.0
8
  app_file: app.py
9
  pinned: false
 
10
  short_description: Ovis2-4B
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
  title: Ovis2 4B
3
+ emoji: 🦫
4
+ colorFrom: yellow
5
+ colorTo: purple
6
  sdk: gradio
7
+ sdk_version: 5.1.0
8
  app_file: app.py
9
  pinned: false
10
+ license: apache-2.0
11
  short_description: Ovis2-4B
12
  ---
13
 
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import subprocess
2
+ subprocess.run('pip install flash-attn==2.7.0.post2 --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
3
+
4
+ import spaces
5
+ import os
6
+ import re
7
+ import logging
8
+ from typing import List, Any
9
+ from threading import Thread
10
+
11
+ import torch
12
+ import gradio as gr
13
+ from transformers import AutoModelForCausalLM, TextIteratorStreamer
14
+
15
+ model_name = 'AIDC-AI/Ovis2-4B'
16
+ use_thread = True
17
+
18
+ # load model
19
+ model = AutoModelForCausalLM.from_pretrained(model_name,
20
+ torch_dtype=torch.bfloat16,
21
+ multimodal_max_length=8192,
22
+ trust_remote_code=True).to(device='cuda')
23
+ text_tokenizer = model.get_text_tokenizer()
24
+ visual_tokenizer = model.get_visual_tokenizer()
25
+ streamer = TextIteratorStreamer(text_tokenizer, skip_prompt=True, skip_special_tokens=True)
26
+ image_placeholder = '<image>'
27
+ cur_dir = os.path.dirname(os.path.abspath(__file__))
28
+
29
+ logging.getLogger("httpx").setLevel(logging.WARNING)
30
+ logging.basicConfig(level=logging.INFO)
31
+ logger = logging.getLogger(__name__)
32
+
33
+ def initialize_gen_kwargs():
34
+ return {
35
+ "max_new_tokens": 1536,
36
+ "do_sample": False,
37
+ "top_p": None,
38
+ "top_k": None,
39
+ "temperature": None,
40
+ "repetition_penalty": 1.05,
41
+ "eos_token_id": model.generation_config.eos_token_id,
42
+ "pad_token_id": text_tokenizer.pad_token_id,
43
+ "use_cache": True
44
+ }
45
+
46
+ def submit_chat(chatbot, text_input):
47
+ response = ''
48
+ chatbot.append((text_input, response))
49
+ return chatbot ,''
50
+
51
+ @spaces.GPU
52
+ def ovis_chat(chatbot: List[List[str]], image_input: Any):
53
+ conversations, model_inputs = prepare_inputs(chatbot, image_input)
54
+ gen_kwargs = initialize_gen_kwargs()
55
+
56
+ with torch.inference_mode():
57
+ generate_func = lambda: model.generate(**model_inputs, **gen_kwargs, streamer=streamer)
58
+
59
+ if use_thread:
60
+ thread = Thread(target=generate_func)
61
+ thread.start()
62
+ else:
63
+ generate_func()
64
+
65
+ response = ""
66
+ for new_text in streamer:
67
+ response += new_text
68
+ chatbot[-1][1] = response
69
+ yield chatbot
70
+
71
+ if use_thread:
72
+ thread.join()
73
+
74
+ log_conversation(chatbot)
75
+
76
+
77
+ def prepare_inputs(chatbot: List[List[str]], image_input: Any):
78
+ # conversations = [{
79
+ # "from": "system",
80
+ # "value": "You are a helpful assistant, and your task is to provide reliable and structured responses to users."
81
+ # }]
82
+ conversations= []
83
+
84
+ for query, response in chatbot[:-1]:
85
+ conversations.extend([
86
+ {"from": "human", "value": query},
87
+ {"from": "gpt", "value": response}
88
+ ])
89
+
90
+ last_query = chatbot[-1][0].replace(image_placeholder, '')
91
+ conversations.append({"from": "human", "value": last_query})
92
+
93
+ if image_input is not None:
94
+ for conv in conversations:
95
+ if conv["from"] == "human":
96
+ conv["value"] = f'{image_placeholder}\n{conv["value"]}'
97
+ break
98
+
99
+ logger.info(conversations)
100
+
101
+ prompt, input_ids, pixel_values = model.preprocess_inputs(conversations, [image_input], max_partition=16)
102
+ attention_mask = torch.ne(input_ids, text_tokenizer.pad_token_id)
103
+
104
+ model_inputs = {
105
+ "inputs": input_ids.unsqueeze(0).to(device=model.device),
106
+ "attention_mask": attention_mask.unsqueeze(0).to(device=model.device),
107
+ "pixel_values": [pixel_values.to(dtype=visual_tokenizer.dtype, device=visual_tokenizer.device)] if image_input is not None else [None]
108
+ }
109
+
110
+ return conversations, model_inputs
111
+
112
+ def log_conversation(chatbot):
113
+ logger.info("[OVIS_CONV_START]")
114
+ [print(f'Q{i}:\n {request}\nA{i}:\n {answer}') for i, (request, answer) in enumerate(chatbot, 1)]
115
+ logger.info("[OVIS_CONV_END]")
116
+
117
+ def clear_chat():
118
+ return [], None, ""
119
+
120
+ with open(f"{cur_dir}/resource/logo.svg", "r", encoding="utf-8") as svg_file:
121
+ svg_content = svg_file.read()
122
+ font_size = "2.5em"
123
+ svg_content = re.sub(r'(<svg[^>]*)(>)', rf'\1 height="{font_size}" style="vertical-align: middle; display: inline-block;"\2', svg_content)
124
+ html = f"""
125
+ <p align="center" style="font-size: {font_size}; line-height: 1;">
126
+ <span style="display: inline-block; vertical-align: middle;">{svg_content}</span>
127
+ <span style="display: inline-block; vertical-align: middle;">{model_name.split('/')[-1]}</span>
128
+ </p>
129
+ <center><font size=3><b>Ovis</b> has been open-sourced on <a href='https://huggingface.co/{model_name}'>😊 Huggingface</a> and <a href='https://github.com/AIDC-AI/Ovis'>🌟 GitHub</a>. If you find Ovis useful, a like❤️ or a star🌟 would be appreciated.</font></center>
130
+ """
131
+
132
+ latex_delimiters_set = [{
133
+ "left": "\\(",
134
+ "right": "\\)",
135
+ "display": False
136
+ }, {
137
+ "left": "\\begin{equation}",
138
+ "right": "\\end{equation}",
139
+ "display": True
140
+ }, {
141
+ "left": "\\begin{align}",
142
+ "right": "\\end{align}",
143
+ "display": True
144
+ }, {
145
+ "left": "\\begin{alignat}",
146
+ "right": "\\end{alignat}",
147
+ "display": True
148
+ }, {
149
+ "left": "\\begin{gather}",
150
+ "right": "\\end{gather}",
151
+ "display": True
152
+ }, {
153
+ "left": "\\begin{CD}",
154
+ "right": "\\end{CD}",
155
+ "display": True
156
+ }, {
157
+ "left": "\\[",
158
+ "right": "\\]",
159
+ "display": True
160
+ }]
161
+
162
+ text_input = gr.Textbox(label="prompt", placeholder="Enter your text here...", lines=1, container=False)
163
+ with gr.Blocks(title=model_name.split('/')[-1], theme=gr.themes.Ocean()) as demo:
164
+ gr.HTML(html)
165
+ with gr.Row():
166
+ with gr.Column(scale=3):
167
+ image_input = gr.Image(label="image", height=350, type="pil")
168
+ gr.Examples(
169
+ examples=[
170
+ [f"{cur_dir}/examples/ovis2_math2.png", "Find the area of the shaded region."],
171
+ [f"{cur_dir}/examples/ovis2_figure2.png", "What is net profit margin as a percentage of total revenue?"],
172
+ [f"{cur_dir}/examples/ovis2_table0.png", "Convert the table to markdown."],
173
+ [f"{cur_dir}/examples/ovis2_ocr0.jpeg", "OCR:"],
174
+ ],
175
+ inputs=[image_input, text_input]
176
+ )
177
+ with gr.Column(scale=7):
178
+ chatbot = gr.Chatbot(label="Ovis", layout="panel", height=600, show_copy_button=True, latex_delimiters=latex_delimiters_set)
179
+ text_input.render()
180
+ with gr.Row():
181
+ send_btn = gr.Button("Send", variant="primary")
182
+ clear_btn = gr.Button("Clear", variant="secondary")
183
+
184
+ send_click_event = send_btn.click(submit_chat, [chatbot, text_input], [chatbot, text_input]).then(ovis_chat,[chatbot, image_input],chatbot)
185
+ submit_event = text_input.submit(submit_chat, [chatbot, text_input], [chatbot, text_input]).then(ovis_chat,[chatbot, image_input],chatbot)
186
+ clear_btn.click(clear_chat, outputs=[chatbot, image_input, text_input])
187
+
188
+ demo.launch()
examples/ovis2_caption0.jpg ADDED

Git LFS Details

  • SHA256: ddd7939e7b7be2813bd9e11ac21c016b05b10332e5e519b7500c10aca7f32c88
  • Pointer size: 131 Bytes
  • Size of remote file: 437 kB
examples/ovis2_figure0.png ADDED

Git LFS Details

  • SHA256: 80bebf1106831041eaa9baef86d12d443360d5f4e5dd37795d841658853b44fc
  • Pointer size: 132 Bytes
  • Size of remote file: 2.84 MB
examples/ovis2_figure1.png ADDED

Git LFS Details

  • SHA256: af401830ffa31eac748766c49cc678124f859aa5336c38c94b3586fda0e6240c
  • Pointer size: 131 Bytes
  • Size of remote file: 278 kB
examples/ovis2_figure2.png ADDED

Git LFS Details

  • SHA256: ec9e80cf2885022c8fd6120b9ecb5a11907c6af15a8e89bdc7e6f891ca618b1e
  • Pointer size: 131 Bytes
  • Size of remote file: 232 kB
examples/ovis2_math0.jpg ADDED

Git LFS Details

  • SHA256: e9feb598f783b0103888fa6db1dea23045e9245d8417895623f8408b783c062e
  • Pointer size: 129 Bytes
  • Size of remote file: 7.46 kB
examples/ovis2_math1.jpg ADDED

Git LFS Details

  • SHA256: d8a7dc778bae422f40e37ecd6e23e99a08be5d1c81b5d92530d4572bc6d8e2b4
  • Pointer size: 129 Bytes
  • Size of remote file: 6.39 kB
examples/ovis2_math2.png ADDED

Git LFS Details

  • SHA256: 6c58d5fb14f9be6f18b841e707e73dd750bae4a5a0c729ee668313dea43fbef4
  • Pointer size: 131 Bytes
  • Size of remote file: 128 kB
examples/ovis2_multi0.jpg ADDED

Git LFS Details

  • SHA256: 66f1f86d24b0f334f039165ebd1ec3e83cefcf7b8bea87e9ec2d42a09c1f84e5
  • Pointer size: 132 Bytes
  • Size of remote file: 3.41 MB
examples/ovis2_ocr0.jpeg ADDED

Git LFS Details

  • SHA256: f814f1c1c0899bde9e6469fd09d565df9420c4b5ee14e19be097296df19a19f7
  • Pointer size: 131 Bytes
  • Size of remote file: 145 kB
examples/ovis2_ocr1.jpg ADDED

Git LFS Details

  • SHA256: 0b6bf32049e611197aded3dfb7d63e83054412706e2cf24de31561566f4108d2
  • Pointer size: 131 Bytes
  • Size of remote file: 137 kB
examples/ovis2_table0.png ADDED

Git LFS Details

  • SHA256: f7b44366041949bdbd490fbf0485ddd9f94dc7f701d66a04411f98f2d63c402e
  • Pointer size: 132 Bytes
  • Size of remote file: 1.13 MB
examples/ovsi2_know0.png ADDED

Git LFS Details

  • SHA256: ed7a95a6bd29ef5f9e3727f44cbb0c475a52d7d2a7d397214687b34d7f14812f
  • Pointer size: 131 Bytes
  • Size of remote file: 318 kB
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ numpy==1.25.0
2
+ torch==2.4.0
3
+ transformers==4.46.2
4
+ pillow==10.3.0
resource/logo.svg ADDED