Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -75,20 +75,37 @@ def load_vision_model():
|
|
75 |
print("Starting to load vision model...")
|
76 |
model_id = "microsoft/Phi-3.5-vision-instruct"
|
77 |
print(f"Loading model from {model_id}")
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
model = AutoModelForCausalLM.from_pretrained(
|
79 |
model_id,
|
80 |
trust_remote_code=True,
|
81 |
torch_dtype=torch.float16,
|
82 |
-
use_flash_attention_2=
|
|
|
|
|
83 |
)
|
84 |
print("Model loaded successfully")
|
|
|
85 |
print("Loading processor...")
|
86 |
processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True, num_crops=16)
|
87 |
print("Processor loaded successfully")
|
|
|
88 |
return model, processor
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
except Exception as e:
|
90 |
-
print(f"
|
91 |
-
|
|
|
92 |
|
93 |
|
94 |
# Process audio input within a GPU-decorated function
|
|
|
75 |
print("Starting to load vision model...")
|
76 |
model_id = "microsoft/Phi-3.5-vision-instruct"
|
77 |
print(f"Loading model from {model_id}")
|
78 |
+
|
79 |
+
# Check for CUDA availability
|
80 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
81 |
+
print(f"Using device: {device}")
|
82 |
+
|
83 |
+
# Load model with potential memory optimization
|
84 |
model = AutoModelForCausalLM.from_pretrained(
|
85 |
model_id,
|
86 |
trust_remote_code=True,
|
87 |
torch_dtype=torch.float16,
|
88 |
+
use_flash_attention_2=True, # Enable if supported
|
89 |
+
device_map="auto", # Automatically manage model placement
|
90 |
+
low_cpu_mem_usage=True
|
91 |
)
|
92 |
print("Model loaded successfully")
|
93 |
+
|
94 |
print("Loading processor...")
|
95 |
processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True, num_crops=16)
|
96 |
print("Processor loaded successfully")
|
97 |
+
|
98 |
return model, processor
|
99 |
+
except ImportError as e:
|
100 |
+
print(f"Error importing required modules: {str(e)}")
|
101 |
+
print("Please ensure all required dependencies are installed.")
|
102 |
+
except RuntimeError as e:
|
103 |
+
print(f"Runtime error (possibly CUDA out of memory): {str(e)}")
|
104 |
+
print("Consider using a smaller model or enabling GPU offloading.")
|
105 |
except Exception as e:
|
106 |
+
print(f"Unexpected error in loading vision model: {str(e)}")
|
107 |
+
|
108 |
+
return None, None
|
109 |
|
110 |
|
111 |
# Process audio input within a GPU-decorated function
|