0llheaven commited on
Commit
ddfe551
·
verified ·
1 Parent(s): 0a05d96

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -6
app.py CHANGED
@@ -1,15 +1,16 @@
1
  import gradio as gr
2
  import torch
3
  from PIL import Image
4
- from transformers import MllamaForConditionalGeneration, AutoProcessor
5
  from transformers import TextStreamer
6
  from torchvision.transforms import Resize
 
7
 
8
  # Define the model and processor
9
  model_id = "0llheaven/Llama-3.2-11B-Vision-Radiology-mini"
10
 
11
  # device = "cuda" if torch.cuda.is_available() else "cpu"
12
- model = MllamaForConditionalGeneration.from_pretrained(
13
  model_id,
14
  # load_in_4bit=True,
15
  torch_dtype=torch.bfloat16,
@@ -20,6 +21,14 @@ model.gradient_checkpointing_enable()
20
 
21
  processor = AutoProcessor.from_pretrained(model_id)
22
 
 
 
 
 
 
 
 
 
23
  # Function to process the image and generate the description
24
  def generate_description(image: Image.Image, instruction: str):
25
  image = image.convert("RGB")
@@ -34,8 +43,9 @@ def generate_description(image: Image.Image, instruction: str):
34
  ]}
35
  ]
36
 
37
- input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
38
- inputs = processor(
 
39
  image,
40
  input_text,
41
  add_special_tokens=False,
@@ -43,8 +53,17 @@ def generate_description(image: Image.Image, instruction: str):
43
  ).to(model.device)
44
 
45
  # Generate the output from the model
46
- output = model.generate(**inputs, max_new_tokens=256)
47
- return processor.decode(output[0])
 
 
 
 
 
 
 
 
 
48
 
49
  # Define Gradio interface
50
  interface = gr.Interface(
 
1
  import gradio as gr
2
  import torch
3
  from PIL import Image
4
+ from transformers import AutoModelForImageTextToText, MllamaForConditionalGeneration, AutoProcessor
5
  from transformers import TextStreamer
6
  from torchvision.transforms import Resize
7
+ from unsloth import FastVisionModel
8
 
9
  # Define the model and processor
10
  model_id = "0llheaven/Llama-3.2-11B-Vision-Radiology-mini"
11
 
12
  # device = "cuda" if torch.cuda.is_available() else "cpu"
13
+ model = AutoModelForImageTextToText.from_pretrained(
14
  model_id,
15
  # load_in_4bit=True,
16
  torch_dtype=torch.bfloat16,
 
21
 
22
  processor = AutoProcessor.from_pretrained(model_id)
23
 
24
+ FastVisionModel.for_inference(model)
25
+ print("กำลังโหลด tokenizer...")
26
+ base_model, tokenizer = FastVisionModel.from_pretrained(
27
+ "unsloth/Llama-3.2-11B-Vision-Instruct",
28
+ # load_in_4bit = True,
29
+ use_gradient_checkpointing = "unsloth",
30
+ )
31
+
32
  # Function to process the image and generate the description
33
  def generate_description(image: Image.Image, instruction: str):
34
  image = image.convert("RGB")
 
43
  ]}
44
  ]
45
 
46
+ input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
47
+ # input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
48
+ inputs = tokenizer(
49
  image,
50
  input_text,
51
  add_special_tokens=False,
 
53
  ).to(model.device)
54
 
55
  # Generate the output from the model
56
+ # output = model.generate(**inputs, max_new_tokens=256)
57
+ text_streamer = TextStreamer(tokenizer, skip_prompt=True)
58
+ outputs = model.generate(
59
+ **inputs,
60
+ streamer=text_streamer,
61
+ max_new_tokens=256,
62
+ use_cache=True,
63
+ temperature=1.5,
64
+ min_p=0.1
65
+ )
66
+ return tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
67
 
68
  # Define Gradio interface
69
  interface = gr.Interface(