Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor | |
from qwen_vl_utils import process_vision_info | |
import torch | |
from PIL import Image | |
# 指定模型路径 | |
local_path = "Fancy-MLLM/R1-OneVision-7B" | |
# 加载模型和处理器 | |
model = Qwen2_5_VLForConditionalGeneration.from_pretrained( | |
local_path, torch_dtype="auto", device_map="cpu" | |
) | |
processor = AutoProcessor.from_pretrained(local_path) | |
# 处理输入并生成输出 | |
def generate_output(image, text): | |
if image is None: | |
return "Error: No image uploaded!" | |
# 处理输入数据 | |
messages = [ | |
{ | |
"role": "user", | |
"content": [ | |
{"type": "image", "image": image, 'min_pixels': 1003520, 'max_pixels': 12845056}, | |
{"type": "text", "text": text}, | |
], | |
} | |
] | |
# 生成模型输入 | |
text_input = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) | |
image_inputs, video_inputs = process_vision_info(messages) | |
inputs = processor( | |
text=[text_input], | |
images=image_inputs, | |
videos=video_inputs, | |
padding=True, | |
return_tensors="pt", | |
) | |
inputs = inputs.to(model.device) # 适配 CPU/GPU | |
# **同步执行**,避免线程问题 | |
output_tokens = model.generate( | |
**inputs, | |
max_new_tokens=4096, | |
top_p=0.001, | |
top_k=1, | |
temperature=0.01, | |
repetition_penalty=1.0, | |
) | |
# 解析输出 | |
generated_text = processor.batch_decode(output_tokens, skip_special_tokens=True)[0] | |
return generated_text # 直接返回结果 | |
# UI 组件 | |
with gr.Blocks() as demo: | |
gr.HTML("""<center><font size=8>🦖 R1-OneVision Demo</center>""") | |
with gr.Row(): | |
with gr.Column(): | |
input_image = gr.Image(type="pil", label="Upload") # **改回 PIL 处理** | |
input_text = gr.Textbox(label="Input your question") | |
with gr.Row(): | |
clear_btn = gr.ClearButton([input_image, input_text]) | |
submit_btn = gr.Button("Submit", variant="primary") | |
with gr.Column(): | |
output_text = gr.Markdown(elem_id="qwen-md", container=True) | |
# 绑定事件,去掉 queue=True | |
submit_btn.click(fn=generate_output, inputs=[input_image, input_text], outputs=output_text) | |
demo.launch(share=True) | |