Spaces:
Sleeping
Sleeping
File size: 7,803 Bytes
84c45b1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 |
import gradio as gr
from transformers import AutoModelForCausalLM,AutoProcessor,Qwen2VLForConditionalGeneration
from PIL import Image
import os
import tempfile
import torch
from pathlib import Path
import secrets
model = Qwen2VLForConditionalGeneration.from_pretrained(
"Qwen/Qwen2-VL-7B-Instruct", torch_dtype="auto", device_map="auto"
)
processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct")
math_messages = []
def process_image(image, shouldConvert=False):
global math_messages
math_messages = [] # reset when upload image
uploaded_file_dir = os.environ.get("GRADIO_TEMP_DIR") or str(
Path(tempfile.gettempdir()) / "gradio"
)
os.makedirs(uploaded_file_dir, exist_ok=True)
name = f"tmp{secrets.token_hex(20)}.jpg"
filename = os.path.join(uploaded_file_dir, name)
if shouldConvert:
new_img = Image.new('RGB', size=(image.width, image.height), color=(255, 255, 255))
new_img.paste(image, (0, 0), mask=image)
image = new_img
image.save(filename)
messages = [{
'role': 'system',
'content': [{'text': 'You are a helpful assistant.'}]
}, {
'role': 'user',
'content': [
{'image': f'file://{filename}'},
{'text': 'Please describe the math-related content in this image, ensuring that any LaTeX formulas are correctly transcribed. Non-mathematical details do not need to be described.'}
]
}]
text_prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(
text = [text_prompt],
images = [image],
padding = True,
return_tensors = "pt"
)
output_ids = model.generate(**inputs, max_new_tokens=1024)
generated_ids = [
output_ids[len(input_ids) :]
for input_ids, output_ids in zip(inputs.input_ids, output_ids)
]
output_text = processor.batch_decode(
generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True
)
os.remove(filename)
return output_text
def get_math_response(image_description, user_question):
global math_messages
if not math_messages:
math_messages.append({'role': 'system', 'content': 'You are a helpful math assistant.'})
math_messages = math_messages[:1]
if image_description is not None:
content = f'Image description: {image_description}\n\n'
else:
content = ''
query = f"{content}User question: {user_question}"
math_messages.append({'role': 'user', 'content': query})
from transformers import AutoModelForCausalLM, AutoTokenizer
model_name = "Qwen/Qwen2-Math-72B-Instruct"
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype="auto",
device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
text = tokenizer.apply_chat_template(
math_messages,
tokenize=False,
add_generation_prompt=True
)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
generated_ids = model.generate(
**model_inputs,
max_new_tokens=512
)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
answer = None
for resp in response:
if resp.output is None:
continue
answer = resp.output.choices[0].message.content
yield answer.replace("\\", "\\\\")
print(f'query: {query}\nanswer: {answer}')
if answer is None:
math_messages.pop()
else:
math_messages.append({'role': 'assistant', 'content': answer})
def math_chat_bot(image, sketchpad, question, state):
current_tab_index = state["tab_index"]
image_description = None
# Upload
if current_tab_index == 0:
if image is not None:
image_description = process_image(image)
# Sketch
elif current_tab_index == 1:
print(sketchpad)
if sketchpad and sketchpad["composite"]:
image_description = process_image(sketchpad["composite"], True)
yield from get_math_response(image_description, question)
css = """
#qwen-md .katex-display { display: inline; }
#qwen-md .katex-display>.katex { display: inline; }
#qwen-md .katex-display>.katex>.katex-html { display: inline; }
"""
def tabs_select(e: gr.SelectData, _state):
_state["tab_index"] = e.index
# 创建Gradio接口
with gr.Blocks(css=css) as demo:
gr.HTML("""\
<p align="center"><img src="https://modelscope.oss-cn-beijing.aliyuncs.com/resource/qwen.png" style="height: 60px"/><p>"""
"""<center><font size=8>📖 Qwen2-Math Demo</center>"""
"""\
<center><font size=3>This WebUI is based on Qwen2-VL for OCR and Qwen2-Math for mathematical reasoning. You can input either images or texts of mathematical or arithmetic problems.</center>"""
)
state = gr.State({"tab_index": 0})
with gr.Row():
with gr.Column():
with gr.Tabs() as input_tabs:
with gr.Tab("Upload"):
input_image = gr.Image(type="pil", label="Upload"),
with gr.Tab("Sketch"):
input_sketchpad = gr.Sketchpad(type="pil", label="Sketch", layers=False)
input_tabs.select(fn=tabs_select, inputs=[state])
input_text = gr.Textbox(label="input your question")
with gr.Row():
with gr.Column():
clear_btn = gr.ClearButton(
[*input_image, input_sketchpad, input_text])
with gr.Column():
submit_btn = gr.Button("Submit", variant="primary")
with gr.Column():
output_md = gr.Markdown(label="answer",
latex_delimiters=[{
"left": "\\(",
"right": "\\)",
"display": True
}, {
"left": "\\begin\{equation\}",
"right": "\\end\{equation\}",
"display": True
}, {
"left": "\\begin\{align\}",
"right": "\\end\{align\}",
"display": True
}, {
"left": "\\begin\{alignat\}",
"right": "\\end\{alignat\}",
"display": True
}, {
"left": "\\begin\{gather\}",
"right": "\\end\{gather\}",
"display": True
}, {
"left": "\\begin\{CD\}",
"right": "\\end\{CD\}",
"display": True
}, {
"left": "\\[",
"right": "\\]",
"display": True
}],
elem_id="qwen-md")
submit_btn.click(
fn=math_chat_bot,
inputs=[*input_image, input_sketchpad, input_text, state],
outputs=output_md)
demo.launch() |