Update app.py
Browse files
app.py
CHANGED
@@ -5,13 +5,20 @@ from transformers import (
|
|
5 |
AutoModelForImageTextToText,
|
6 |
AutoTokenizer,
|
7 |
AutoProcessor,
|
8 |
-
BitsAndBytesConfig,
|
9 |
pipeline
|
10 |
)
|
11 |
from PIL import Image
|
12 |
import os
|
13 |
import spaces
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
# Configuration
|
16 |
MODEL_4B = "google/medgemma-4b-it"
|
17 |
MODEL_27B = "google/medgemma-27b-text-it"
|
@@ -31,8 +38,11 @@ class MedGemmaApp:
|
|
31 |
"device_map": "auto",
|
32 |
}
|
33 |
|
34 |
-
if
|
|
|
35 |
model_kwargs["quantization_config"] = BitsAndBytesConfig(load_in_4bit=True)
|
|
|
|
|
36 |
|
37 |
return model_kwargs
|
38 |
|
@@ -172,9 +182,10 @@ with gr.Blocks(title="MedGemma Medical AI Assistant", theme=gr.themes.Soft()) as
|
|
172 |
)
|
173 |
|
174 |
use_quantization = gr.Checkbox(
|
175 |
-
value=
|
176 |
-
label="Use 4-bit Quantization",
|
177 |
-
info="Reduces memory usage (
|
|
|
178 |
)
|
179 |
|
180 |
load_btn = gr.Button("🚀 Load Model", variant="primary")
|
|
|
5 |
AutoModelForImageTextToText,
|
6 |
AutoTokenizer,
|
7 |
AutoProcessor,
|
|
|
8 |
pipeline
|
9 |
)
|
10 |
from PIL import Image
|
11 |
import os
|
12 |
import spaces
|
13 |
|
14 |
+
# Try to import bitsandbytes for quantization (optional)
|
15 |
+
try:
|
16 |
+
from transformers import BitsAndBytesConfig
|
17 |
+
QUANTIZATION_AVAILABLE = True
|
18 |
+
except ImportError:
|
19 |
+
QUANTIZATION_AVAILABLE = False
|
20 |
+
print("⚠️ bitsandbytes not available. Quantization will be disabled.")
|
21 |
+
|
22 |
# Configuration
|
23 |
MODEL_4B = "google/medgemma-4b-it"
|
24 |
MODEL_27B = "google/medgemma-27b-text-it"
|
|
|
38 |
"device_map": "auto",
|
39 |
}
|
40 |
|
41 |
+
# Only add quantization if available and requested
|
42 |
+
if use_quantization and QUANTIZATION_AVAILABLE:
|
43 |
model_kwargs["quantization_config"] = BitsAndBytesConfig(load_in_4bit=True)
|
44 |
+
elif use_quantization and not QUANTIZATION_AVAILABLE:
|
45 |
+
print("⚠️ Quantization requested but bitsandbytes not available. Loading without quantization.")
|
46 |
|
47 |
return model_kwargs
|
48 |
|
|
|
182 |
)
|
183 |
|
184 |
use_quantization = gr.Checkbox(
|
185 |
+
value=QUANTIZATION_AVAILABLE,
|
186 |
+
label="Use 4-bit Quantization" + ("" if QUANTIZATION_AVAILABLE else " (Unavailable)"),
|
187 |
+
info="Reduces memory usage" + ("" if QUANTIZATION_AVAILABLE else " - bitsandbytes not installed"),
|
188 |
+
interactive=QUANTIZATION_AVAILABLE
|
189 |
)
|
190 |
|
191 |
load_btn = gr.Button("🚀 Load Model", variant="primary")
|