0llheaven commited on
Commit
e094577
·
verified ·
1 Parent(s): 4f80322

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -127
app.py CHANGED
@@ -1,129 +1,60 @@
1
- import spaces
2
- import os
3
- import sys
4
- import subprocess
5
-
6
- def install_packages():
7
- subprocess.check_call([sys.executable, "-m", "pip", "install", "unsloth-zoo"])
8
- subprocess.check_call([sys.executable, "-m", "pip", "install", "--no-deps", "git+https://github.com/unslothai/unsloth.git"])
9
-
10
- try:
11
- install_packages()
12
- except Exception as e:
13
- print(f"Failed to install packages: {e}")
14
-
15
- import warnings
16
- import torch
17
-
18
- from transformers import TextStreamer
19
  import gradio as gr
20
- from huggingface_hub import login
21
  from PIL import Image
22
-
23
- warnings.filterwarnings('ignore')
24
-
25
- model = None
26
- tokenizer = None
27
-
28
- ###@spaces.GPU
29
- def load_model():
30
- global model
31
- print("กำลังโหลดโมเดล...")
32
- try:
33
- from transformers import AutoModelForVision2Seq
34
- print("กำลังโหลดโมเดล fine-tuned...")
35
-
36
- device = "cuda" if torch.cuda.is_available() else "cpu"
37
- model = AutoModelForVision2Seq.from_pretrained(
38
- "0llheaven/Llama-3.2-11B-Vision-Radiology-mini",
39
- # load_in_4bit = True,
40
- device_map=device,
41
- torch_dtype = torch.float16
42
- )
43
-
44
- print("โหลดโมเดลสำเร็จ!")
45
- return True
46
-
47
- except Exception as e:
48
- print(f"เกิดข้อผิดพลาดในการโหลดโมเดล: {str(e)}")
49
- import traceback
50
- traceback.print_exc()
51
- return False
52
-
53
- @spaces.GPU(duration=120)
54
- def process_image(image):
55
- global model
56
-
57
- ### โหลด tokenizer จาก base model
58
- from unsloth import FastVisionModel
59
-
60
- FastVisionModel.for_inference(model) ###ลองแก้ไขปัญหา torch
61
-
62
- from transformers import AutoTokenizer
63
- print("กำลังโหลด tokenizer...")
64
- base_model, tokenizer = FastVisionModel.from_pretrained(
65
- "unsloth/Llama-3.2-11B-Vision-Instruct",
66
- use_gradient_checkpointing = "unsloth",
67
- ### device_map="auto" ### เพิ่มตรงนี้
68
- )
69
-
70
- print("\nใน process_image():")
71
- print("Type of model:", type(model))
72
- print("A. Type of tokenizer:", type(tokenizer))
73
- if tokenizer is not None:
74
- print("B. Available methods:", dir(tokenizer))
75
-
76
- if image is None:
77
- return "กรุณาอัพโหลดรูปภาพ"
78
-
79
- try:
80
- if not isinstance(image, Image.Image):
81
- image = Image.fromarray(image)
82
-
83
- print("0. Image info:", type(image), image.size) # เพิ่ม debug ข้อมูลรูปภาพ
84
- instruction = "You are an expert radiographer. Describe accurately what you see in this image."
85
- messages = [
86
- {"role": "user", "content": [
87
- {"type": "image"},
88
- {"type": "text", "text": instruction}
89
- ]}
90
- ]
91
-
92
- print("1. Messages:", messages)
93
-
94
- print("2. Tokenizer type:", type(tokenizer))
95
- input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
96
- print("3. Chat template success:", input_text[:100])
97
- inputs = tokenizer(
98
- image,
99
- input_text,
100
- add_special_tokens=False,
101
- return_tensors="pt",
102
- ).to("cuda")
103
- print("3. Tokenizer inputs:", inputs.keys()) # Debug 3
104
-
105
- text_streamer = TextStreamer(tokenizer, skip_prompt=True)
106
- outputs = model.generate(
107
- **inputs,
108
- streamer=text_streamer,
109
- max_new_tokens=256,
110
- use_cache=True,
111
- temperature=1.5,
112
- min_p=0.1
113
- )
114
-
115
- return tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
116
-
117
- except Exception as e:
118
- return f"เกิดข้อผิดพลาด: {str(e)}"
119
-
120
- if load_model():
121
- demo = gr.Interface(
122
- fn=process_image,
123
- inputs=gr.Image(type="pil", label="Upload Image"),
124
- outputs=gr.Textbox(label="Generated Caption"),
125
- title="Medical Vision Analysis"
126
- )
127
-
128
- if __name__ == "__main__":
129
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import torch
3
  from PIL import Image
4
+ from transformers import MllamaForConditionalGeneration, AutoProcessor
5
+ from transformers import TextStreamer
6
+ from torchvision.transforms import Resize
7
+
8
+ # Define the model and processor
9
+ model_id = "0llheaven/Llama-3.2-11B-Vision-Radiology-mini"
10
+
11
+ device = "cuda" if torch.cuda.is_available() else "cpu"
12
+ model = MllamaForConditionalGeneration.from_pretrained(
13
+ model_id,
14
+ load_in_4bit=True,
15
+ torch_dtype=torch.bfloat16,
16
+ device_map=device,
17
+ )
18
+
19
+ model.gradient_checkpointing_enable()
20
+
21
+ processor = AutoProcessor.from_pretrained(model_id)
22
+
23
+ # Function to process the image and generate the description
24
+ def generate_description(image: Image.Image, instruction: str):
25
+ image = image.convert("RGB")
26
+ # image = Resize((224, 224))(image)
27
+
28
+ # Create the message to pass to the model
29
+ instruction = "You are an expert radiographer. Describe accurately what you see in this image."
30
+ messages = [
31
+ {"role": "user", "content": [
32
+ {"type": "image"},
33
+ {"type": "text", "text": instruction}
34
+ ]}
35
+ ]
36
+
37
+ input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
38
+ inputs = processor(
39
+ image,
40
+ input_text,
41
+ add_special_tokens=False,
42
+ return_tensors="pt"
43
+ ).to(model.device)
44
+
45
+ # Generate the output from the model
46
+ output = model.generate(**inputs, max_new_tokens=256)
47
+ return processor.decode(output[0])
48
+
49
+ # Define Gradio interface
50
+ interface = gr.Interface(
51
+ fn=generate_description,
52
+ inputs=gr.Image(type="pil", label="Upload an Image"),
53
+ outputs=gr.Textbox(label="Generated Description"),
54
+ live=True,
55
+ title="Radiology Image Description Generator",
56
+ description="Upload an image and provide an instruction to generate a description using a vision-language model."
57
+ )
58
+
59
+ # Launch the interface
60
+ interface.launch()