Spaces:
Runtime error
Runtime error
File size: 3,093 Bytes
f6a98e4 a997f34 f6a98e4 d35379a 8f558df f6a98e4 36163a8 f6a98e4 36163a8 39761c3 36163a8 97313a7 723de5f 97313a7 723de5f 97313a7 723de5f f6a98e4 723de5f c462fef 36163a8 c462fef 36163a8 39761c3 36163a8 c462fef 36163a8 c462fef f6a98e4 723de5f ae0aef7 8f558df 723de5f f6a98e4 723de5f f6a98e4 723de5f 97313a7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
import spaces
import os
import time
import torch
import gradio as gr
from threading import Thread
from PIL import Image
from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor
from qwen_vl_utils import process_vision_info
# Model and processor initialization
model = Qwen2VLForConditionalGeneration.from_pretrained(
"Qwen/QVQ-72B-Preview",
torch_dtype="auto",
device_map="auto"
)
processor = AutoProcessor.from_pretrained("Qwen/QVQ-72B-Preview")
# Footer
footer = """
<div style="text-align: center; margin-top: 20px;">
<p>Powered by QVQ-72B Model</p>
</div>
"""
# Vision model function
@spaces.GPU()
def process_image(image, text_input=None):
try:
# Convert image to PIL format if needed
if not isinstance(image, Image.Image):
image = Image.fromarray(image).convert("RGB")
# Prepare messages
if not text_input:
text_input = "Please describe this image in detail."
messages = [
{
"role": "system",
"content": [
{"type": "text", "text": "You are a helpful and harmless assistant. You are Qwen developed by Alibaba. You should think step-by-step."}
],
},
{
"role": "user",
"content": [
{"type": "image", "image": image},
{"type": "text", "text": text_input}
],
}
]
# Process inputs
text = processor.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
image_inputs, video_inputs = process_vision_info(messages)
inputs = processor(
text=[text],
images=image_inputs,
videos=video_inputs,
padding=True,
return_tensors="pt",
)
inputs = inputs.to("cuda")
# Generate response
generated_ids = model.generate(**inputs, max_new_tokens=8192)
generated_ids_trimmed = [
out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
]
output_text = processor.batch_decode(
generated_ids_trimmed,
skip_special_tokens=True,
clean_up_tokenization_spaces=False
)[0]
return output_text
except Exception as e:
return f"Error processing image: {str(e)}"
# CSS styling
css = """
footer {
visibility: hidden;
}
"""
# Gradio interface
with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css) as demo:
with gr.Row():
input_img = gr.Image(label="Input Image")
with gr.Row():
text_input = gr.Textbox(label="Question (Optional)")
with gr.Row():
submit_btn = gr.Button(value="Submit")
with gr.Row():
output_text = gr.Textbox(label="Response")
submit_btn.click(process_image, [input_img, text_input], [output_text])
gr.HTML(footer)
# Launch the app
demo.launch(debug=True) |