SeungHyuk-Kim commited on
Commit
91fed70
Β·
verified Β·
1 Parent(s): 23bc0a7

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -0
app.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoProcessor
3
+ from PIL import Image
4
+ import torch
5
+
6
+ model_id = "skt/A.X-4.0-VL-Light"
7
+
8
+ model = AutoModelForCausalLM.from_pretrained(
9
+ model_id, trust_remote_code=True, torch_dtype=torch.bfloat16
10
+ ).to("cuda")
11
+
12
+ processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
13
+
14
+ def ask_image_question(image, text):
15
+ messages = [{"role": "user", "content": [{"type": "image"}, {"type": "text", "text": text}]}]
16
+ inputs = processor(images=[image], conversations=[messages], return_tensors="pt").to("cuda")
17
+
18
+ generation_kwargs = {
19
+ "max_new_tokens": 256,
20
+ "top_p": 0.8,
21
+ "temperature": 0.5,
22
+ "top_k": 20,
23
+ "repetition_penalty": 1.05,
24
+ "do_sample": True,
25
+ }
26
+
27
+ generated_ids = model.generate(**inputs, **generation_kwargs)
28
+ output = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
29
+ return output
30
+
31
+ gr.Interface(fn=ask_image_question,
32
+ inputs=[gr.Image(type="pil"), gr.Textbox(label="질문")],
33
+ outputs="text").launch()