sagar007 commited on
Commit
066eb01
·
verified ·
1 Parent(s): 4b9e6aa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -20
app.py CHANGED
@@ -19,8 +19,13 @@ class LLaVAPhiModel:
19
  if self.tokenizer.pad_token is None:
20
  self.tokenizer.pad_token = self.tokenizer.eos_token
21
 
22
- # Initialize processor (can be done outside GPU context)
23
- self.processor = AutoProcessor.from_pretrained("microsoft/clip-vit-base-patch32")
 
 
 
 
 
24
 
25
  # Store conversation history
26
  self.history = []
@@ -52,16 +57,26 @@ class LLaVAPhiModel:
52
  self.model.config.pad_token_id = self.tokenizer.eos_token_id
53
 
54
  if self.clip is None:
55
- # Load CLIP model
56
- self.clip = AutoModel.from_pretrained("microsoft/clip-vit-base-patch32").to(self.device)
 
 
 
 
 
57
 
58
  @spaces.GPU
59
  def process_image(self, image):
60
- """Process image through CLIP"""
61
  try:
62
  # Ensure models are loaded
63
  self.ensure_models_loaded()
64
 
 
 
 
 
 
65
  # Convert image to correct format
66
  if isinstance(image, str):
67
  image = Image.open(image)
@@ -69,14 +84,18 @@ class LLaVAPhiModel:
69
  image = Image.fromarray(image)
70
 
71
  with torch.no_grad():
72
- image_inputs = self.processor(images=image, return_tensors="pt")
73
- image_features = self.clip.get_image_features(
74
- pixel_values=image_inputs.pixel_values.to(self.device)
75
- )
76
- return image_features
 
 
 
 
77
  except Exception as e:
78
- logging.error(f"Error processing image: {str(e)}")
79
- raise
80
 
81
  @spaces.GPU(duration=120) # Set longer duration for generation
82
  def generate_response(self, message, image=None):
@@ -85,14 +104,10 @@ class LLaVAPhiModel:
85
  self.ensure_models_loaded()
86
 
87
  if image is not None:
88
- try:
89
- image_features = self.process_image(image)
90
- has_image = True
91
- except Exception as e:
92
- logging.error(f"Failed to process image: {str(e)}")
93
- image_features = None
94
- has_image = False
95
- message = f"Note: Failed to process image. Continuing with text only. Error: {str(e)}\n{message}"
96
 
97
  prompt = f"human: {'<image>' if has_image else ''}\n{message}\ngpt:"
98
  context = ""
 
19
  if self.tokenizer.pad_token is None:
20
  self.tokenizer.pad_token = self.tokenizer.eos_token
21
 
22
+ try:
23
+ # Initialize processor (can be done outside GPU context)
24
+ self.processor = AutoProcessor.from_pretrained("huggingface/clip-vit-base-patch32")
25
+ except Exception as e:
26
+ logging.warning(f"Failed to load CLIP processor: {str(e)}")
27
+ # Fallback to basic tokenizer if needed
28
+ self.processor = None
29
 
30
  # Store conversation history
31
  self.history = []
 
57
  self.model.config.pad_token_id = self.tokenizer.eos_token_id
58
 
59
  if self.clip is None:
60
+ # Load CLIP model if not already loaded
61
+ if self.clip is None:
62
+ try:
63
+ self.clip = AutoModel.from_pretrained("huggingface/clip-vit-base-patch32").to(self.device)
64
+ except Exception as e:
65
+ logging.warning(f"Failed to load CLIP model: {str(e)}")
66
+ self.clip = None
67
 
68
  @spaces.GPU
69
  def process_image(self, image):
70
+ """Process image through CLIP if available, otherwise return None"""
71
  try:
72
  # Ensure models are loaded
73
  self.ensure_models_loaded()
74
 
75
+ # If CLIP isn't available, return None
76
+ if self.clip is None or self.processor is None:
77
+ logging.warning("CLIP model or processor not available - skipping image processing")
78
+ return None
79
+
80
  # Convert image to correct format
81
  if isinstance(image, str):
82
  image = Image.open(image)
 
84
  image = Image.fromarray(image)
85
 
86
  with torch.no_grad():
87
+ try:
88
+ image_inputs = self.processor(images=image, return_tensors="pt")
89
+ image_features = self.clip.get_image_features(
90
+ pixel_values=image_inputs.pixel_values.to(self.device)
91
+ )
92
+ return image_features
93
+ except Exception as e:
94
+ logging.error(f"Error during image processing: {str(e)}")
95
+ return None
96
  except Exception as e:
97
+ logging.error(f"Error in process_image: {str(e)}")
98
+ return None
99
 
100
  @spaces.GPU(duration=120) # Set longer duration for generation
101
  def generate_response(self, message, image=None):
 
104
  self.ensure_models_loaded()
105
 
106
  if image is not None:
107
+ image_features = self.process_image(image)
108
+ has_image = image_features is not None
109
+ if not has_image:
110
+ message = "Note: Image processing is not available - continuing with text only.\n" + message
 
 
 
 
111
 
112
  prompt = f"human: {'<image>' if has_image else ''}\n{message}\ngpt:"
113
  context = ""