Spaces:
Sleeping
Sleeping
File size: 2,315 Bytes
8e76f41 bd40713 8e76f41 ae54346 6f2dfe8 af95610 f09701f b393445 f09701f ea9cb5f 47ba7f8 bd40713 f09701f bd40713 f09701f 8e76f41 7b3b2c9 8e76f41 af95610 8e76f41 af95610 8e76f41 af95610 f09701f 8e76f41 f09701f 8e76f41 f09701f f65c36e 8e76f41 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
from PIL import Image
import spaces
import gradio as gr
MODEL_ID = "Qwen/Qwen2-VL-7B-Instruct"
MODEL_FINETUNE_ID = "davidr99/qwen2.5-7b-instruct-blackjack"
EXAMPLES = [
"examples/black_jack_screenshot_1737088587.png",
"examples/black_jack_screenshot_1737088629.png",
"examples/black_jack_screenshot_1737088648.png",
"examples/Screenshot 2024-12-06 220410.png"
]
from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor
from qwen_vl_utils import process_vision_info
model = Qwen2VLForConditionalGeneration.from_pretrained(MODEL_ID, torch_dtype="auto").to('cuda')
model.load_adapter(MODEL_FINETUNE_ID)
processor = AutoProcessor.from_pretrained(MODEL_FINETUNE_ID)
@spaces.GPU(duration=30)
def blackjack_ai(image, question):
instruction = question
messages = [
{"role": "system",
"content": [
{"type":"text", "text": "You are a blackjack player. Extract the image into json information."} ]
},
{"role": "user", "content": [
{"type": "image", "image": image},
{"type": "text", "text": instruction}
]}
]
print(messages)
# Preparation for inference
text = processor.apply_chat_template(
messages, tokenize=False, add_generation_prompt=True
)
image_inputs, video_inputs = process_vision_info(messages)
inputs = processor(
text=[text],
images=image_inputs,
videos=video_inputs,
padding=True,
return_tensors="pt",
)
inputs = inputs.to("cuda")
# Inference: Generation of the output
generated_ids = model.generate(**inputs, max_new_tokens=128)
generated_ids_trimmed = [
out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
]
output_text = processor.batch_decode(
generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
)
return output_text
with gr.Blocks() as demo:
image = gr.Image(type="filepath")
question = gr.Textbox(value = "extract json from this image.")
submit = gr.Button("Submit")
output = gr.TextArea()
examples = gr.Examples(examples=EXAMPLES, inputs=[image])
submit.click(blackjack_ai, inputs=[image, question], outputs=[output])
demo.launch()
|