Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor, TextIteratorStreamer | |
from threading import Thread | |
from qwen_vl_utils import process_vision_info | |
import torch | |
import time | |
local_path = "Fancy-MLLM/R1-OneVision-7B" | |
model = Qwen2_5_VLForConditionalGeneration.from_pretrained( | |
local_path, torch_dtype="auto", device_map="cpu" | |
) | |
processor = AutoProcessor.from_pretrained(local_path) | |
def generate_output(image, text, button_click): | |
# Prepare input data | |
messages = [ | |
{ | |
"role": "user", | |
"content": [ | |
{"type": "image", "image": image, 'min_pixels': 1003520, 'max_pixels': 12845056}, | |
{"type": "text", "text": text}, | |
], | |
} | |
] | |
# Prepare inputs for the model | |
text_input = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) | |
# print(text_input) | |
# import pdb; pdb.set_trace() | |
image_inputs, video_inputs = process_vision_info(messages) | |
inputs = processor( | |
text=[text_input], | |
images=image_inputs, | |
videos=video_inputs, | |
padding=True, | |
return_tensors="pt", | |
) | |
inputs = inputs.to(model.device) | |
streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True) | |
generation_kwargs = dict( | |
**inputs, | |
streamer=streamer, | |
max_new_tokens=4096, | |
top_p=0.001, | |
top_k=1, | |
temperature=0.01, | |
repetition_penalty=1.0, | |
) | |
thread = Thread(target=model.generate, kwargs=generation_kwargs) | |
thread.start() | |
generated_text = '' | |
try: | |
for new_text in streamer: | |
generated_text += new_text | |
yield f"‎{generated_text}" | |
# print(f"Current text: {generated_text}") # 调试输出 | |
# yield generated_text # 直接输出原始文本 | |
except Exception as e: | |
print(f"Error: {e}") | |
yield f"Error occurred: {str(e)}" | |
Css = """ | |
#output-markdown { | |
overflow-y: auto; | |
white-space: pre-wrap; | |
word-wrap: break-word; | |
} | |
#output-markdown .math { | |
overflow-x: auto; | |
max-width: 100%; | |
} | |
.markdown-text { | |
white-space: pre-wrap; | |
word-wrap: break-word; | |
} | |
#qwen-md .katex-display { display: inline; } | |
#qwen-md .katex-display>.katex { display: inline; } | |
#qwen-md .katex-display>.katex>.katex-html { display: inline; } | |
""" | |
# UI 组件 | |
with gr.Blocks(css=Css) as demo: | |
gr.HTML("""<center><font size=8>🦖 R1-OneVision Demo</center>""") | |
with gr.Row(): | |
with gr.Column(): | |
input_image = gr.Image(type="pil", label="Upload"), | |
input_text = gr.Textbox(label="input your question") | |
with gr.Row(): | |
with gr.Column(): | |
clear_btn = gr.ClearButton([*input_image, input_text]) | |
with gr.Column(): | |
submit_btn = gr.Button("Submit", variant="primary") | |
with gr.Column(): | |
output_text = gr.Markdown( | |
label="Generated Response", | |
max_height="80vh", | |
min_height="50vh", | |
container=True, | |
latex_delimiters=[{ | |
"left": "\\(", | |
"right": "\\)", | |
"display": True | |
}, { | |
"left": "\\begin\{equation\}", | |
"right": "\\end\{equation\}", | |
"display": True | |
}, { | |
"left": "\\begin\{align\}", | |
"right": "\\end\{align\}", | |
"display": True | |
}, { | |
"left": "\\begin\{alignat\}", | |
"right": "\\end\{alignat\}", | |
"display": True | |
}, { | |
"left": "\\begin\{gather\}", | |
"right": "\\end\{gather\}", | |
"display": True | |
}, { | |
"left": "\\begin\{CD\}", | |
"right": "\\end\{CD\}", | |
"display": True | |
}, { | |
"left": "\\[", | |
"right": "\\]", | |
"display": True | |
}], | |
elem_id="qwen-md") | |
submit_btn.click( | |
fn=generate_output, | |
inputs=[*input_image, input_text], | |
outputs=output_text, | |
queue=True | |
) | |
demo.launch(share=True) |