Spaces:

davidr99
/

qwen2-vl-7b-blackjack

Sleeping

App Files Files Community

davidr99 commited on Jan 1

Commit

af95610

1 Parent(s): ee675b4

Update to use transfomers

Browse files

Files changed (2) hide show

app.py +35 -23
requirements.txt +3 -1

app.py CHANGED Viewed

@@ -3,43 +3,55 @@ import spaces
 import gradio as gr
 @spaces.GPU(duration=30)
 def blackjack_ai(image):
-    from unsloth import FastVisionModel
-    model, tokenizer = FastVisionModel.from_pretrained(
-        model_name = "davidr99/qwen2-7b-instruct-blackjack", # YOUR MODEL YOU USED FOR TRAINING
-        load_in_4bit = True, # Set to False for 16bit LoRA
-    )
-    FastVisionModel.for_inference(model) # Enable for inference!
-    image = Image.fromarray(image.astype('uint8'), 'RGB')
-    instruction = "Write the LaTeX representation for this image."
     messages = [
         {"role": "user", "content": [
-            {"type": "image"},
             {"type": "text", "text": instruction}
         ]}
     ]
-    input_text = tokenizer.apply_chat_template(messages, add_generation_prompt = True)
-    inputs = tokenizer(
-        image,
-        input_text,
-        add_special_tokens = False,
-        return_tensors = "pt",
-    ).to("cuda")
-    from transformers import TextStreamer
-    text_streamer = TextStreamer(tokenizer, skip_prompt = True)
-    _ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 128,
-                       use_cache = True, temperature = 1.5, min_p = 0.1)
-    return text_streamer
 with gr.Blocks() as demo:
-    image = gr.Image()
     submit = gr.Button("Submit")
     output = gr.TextArea()

 import gradio as gr
+MODEL_ID = "davidr99/qwen2-7b-instruct-blackjack"
 @spaces.GPU(duration=30)
 def blackjack_ai(image):
+    from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor
+    from qwen_vl_utils import process_vision_info
+    model = Qwen2VLForConditionalGeneration.from_pretrained(MODEL_ID, torch_dtype="auto", device_map="auto")
+    processor = AutoProcessor.from_pretrained(MODEL_ID)
+    instruction = "extract json from this image."
     messages = [
         {"role": "user", "content": [
+            {"type": "image", "image": image},
             {"type": "text", "text": instruction}
         ]}
     ]
+    print(messages)
+    # Preparation for inference
+    text = processor.apply_chat_template(
+        messages, tokenize=False, add_generation_prompt=True
+    )
+    image_inputs, video_inputs = process_vision_info(messages)
+    inputs = processor(
+        text=[text],
+        images=image_inputs,
+        videos=video_inputs,
+        padding=True,
+        return_tensors="pt",
+    )
+    inputs = inputs.to("cuda")
+    # Inference: Generation of the output
+    generated_ids = model.generate(**inputs, max_new_tokens=128)
+    generated_ids_trimmed = [
+        out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
+    ]
+    output_text = processor.batch_decode(
+        generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
+    )
+    return output_text
 with gr.Blocks() as demo:
+    image = gr.Image(type="filepath")
     submit = gr.Button("Submit")
     output = gr.TextArea()

requirements.txt CHANGED Viewed

@@ -1,5 +1,7 @@
 transformers
 gradio
-unsloth
 pillow
 spaces

 transformers
 gradio
 pillow
+qwen-vl-utils
+torchvision
+torch
 spaces