sagar007 commited on
Commit
7dbf49f
·
verified ·
1 Parent(s): 11cd804

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -3
app.py CHANGED
@@ -75,20 +75,37 @@ def load_vision_model():
75
  print("Starting to load vision model...")
76
  model_id = "microsoft/Phi-3.5-vision-instruct"
77
  print(f"Loading model from {model_id}")
 
 
 
 
 
 
78
  model = AutoModelForCausalLM.from_pretrained(
79
  model_id,
80
  trust_remote_code=True,
81
  torch_dtype=torch.float16,
82
- use_flash_attention_2=False
 
 
83
  )
84
  print("Model loaded successfully")
 
85
  print("Loading processor...")
86
  processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True, num_crops=16)
87
  print("Processor loaded successfully")
 
88
  return model, processor
 
 
 
 
 
 
89
  except Exception as e:
90
- print(f"Detailed error in loading vision model: {str(e)}")
91
- return None, None
 
92
 
93
 
94
  # Process audio input within a GPU-decorated function
 
75
  print("Starting to load vision model...")
76
  model_id = "microsoft/Phi-3.5-vision-instruct"
77
  print(f"Loading model from {model_id}")
78
+
79
+ # Check for CUDA availability
80
+ device = "cuda" if torch.cuda.is_available() else "cpu"
81
+ print(f"Using device: {device}")
82
+
83
+ # Load model with potential memory optimization
84
  model = AutoModelForCausalLM.from_pretrained(
85
  model_id,
86
  trust_remote_code=True,
87
  torch_dtype=torch.float16,
88
+ use_flash_attention_2=True, # Enable if supported
89
+ device_map="auto", # Automatically manage model placement
90
+ low_cpu_mem_usage=True
91
  )
92
  print("Model loaded successfully")
93
+
94
  print("Loading processor...")
95
  processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True, num_crops=16)
96
  print("Processor loaded successfully")
97
+
98
  return model, processor
99
+ except ImportError as e:
100
+ print(f"Error importing required modules: {str(e)}")
101
+ print("Please ensure all required dependencies are installed.")
102
+ except RuntimeError as e:
103
+ print(f"Runtime error (possibly CUDA out of memory): {str(e)}")
104
+ print("Consider using a smaller model or enabling GPU offloading.")
105
  except Exception as e:
106
+ print(f"Unexpected error in loading vision model: {str(e)}")
107
+
108
+ return None, None
109
 
110
 
111
  # Process audio input within a GPU-decorated function