import gradio as gr from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor, TextIteratorStreamer from threading import Thread from qwen_vl_utils import process_vision_info import torch import time local_path = "Fancy-MLLM/R1-OneVision-7B" model = Qwen2_5_VLForConditionalGeneration.from_pretrained( local_path, torch_dtype="auto", device_map="cpu" ) processor = AutoProcessor.from_pretrained(local_path) def generate_output(image, text, button_click): # Prepare input data messages = [ { "role": "user", "content": [ {"type": "image", "image": image, 'min_pixels': 1003520, 'max_pixels': 12845056}, {"type": "text", "text": text}, ], } ] # Prepare inputs for the model text_input = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) # print(text_input) # import pdb; pdb.set_trace() image_inputs, video_inputs = process_vision_info(messages) inputs = processor( text=[text_input], images=image_inputs, videos=video_inputs, padding=True, return_tensors="pt", ) inputs = inputs.to(model.device) streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True) generation_kwargs = dict( **inputs, streamer=streamer, max_new_tokens=4096, top_p=0.001, top_k=1, temperature=0.01, repetition_penalty=1.0, ) thread = Thread(target=model.generate, kwargs=generation_kwargs) thread.start() generated_text = '' try: for new_text in streamer: generated_text += new_text yield f"‎{generated_text}" # print(f"Current text: {generated_text}") # 调试输出 # yield generated_text # 直接输出原始文本 except Exception as e: print(f"Error: {e}") yield f"Error occurred: {str(e)}" with gr.Blocks() as demo: gr.HTML("""
🦖 R1-OneVision Demo
""") with gr.Row(): with gr.Column(): input_image = gr.Image(type="pil", label="Upload") # **改回 PIL 处理** input_text = gr.Textbox(label="Input your question") with gr.Row(): clear_btn = gr.ClearButton([input_image, input_text]) submit_btn = gr.Button("Submit", variant="primary") with gr.Column(): output_text = gr.Markdown(elem_id="qwen-md", container=True) submit_btn.click(fn=generate_output, inputs=[input_image, input_text], outputs=output_text) demo.launch(share=True) # Css = """ # #output-markdown { # overflow-y: auto; # white-space: pre-wrap; # word-wrap: break-word; # } # #output-markdown .math { # overflow-x: auto; # max-width: 100%; # } # .markdown-text { # white-space: pre-wrap; # word-wrap: break-word; # } # #qwen-md .katex-display { display: inline; } # #qwen-md .katex-display>.katex { display: inline; } # #qwen-md .katex-display>.katex>.katex-html { display: inline; } # """ # # UI 组件 # with gr.Blocks(css=Css) as demo: # gr.HTML("""
🦖 R1-OneVision Demo
""") # with gr.Row(): # with gr.Column(): # input_image = gr.Image(type="pil", label="Upload") # input_text = gr.Textbox(label="input your question") # with gr.Row(): # with gr.Column(): # clear_btn = gr.ClearButton([input_image, input_text]) # with gr.Column(): # submit_btn = gr.Button("Submit", variant="primary") # with gr.Column(): # output_text = gr.Markdown( # label="Generated Response", # max_height="80vh", # min_height="50vh", # container=True, # latex_delimiters=[{ # "left": "\\(", # "right": "\\)", # "display": True # }, { # "left": "\\begin\{equation\}", # "right": "\\end\{equation\}", # "display": True # }, { # "left": "\\begin\{align\}", # "right": "\\end\{align\}", # "display": True # }, { # "left": "\\begin\{alignat\}", # "right": "\\end\{alignat\}", # "display": True # }, { # "left": "\\begin\{gather\}", # "right": "\\end\{gather\}", # "display": True # }, { # "left": "\\begin\{CD\}", # "right": "\\end\{CD\}", # "display": True # }, { # "left": "\\[", # "right": "\\]", # "display": True # }], # elem_id="qwen-md") # submit_btn.click( # fn=generate_output, # inputs=[input_image, input_text], # outputs=output_text, # queue=True # ) # demo.launch(share=True)