sagar007 commited on
Commit
94ee0c6
·
verified ·
1 Parent(s): 066eb01

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -33
app.py CHANGED
@@ -1,36 +1,34 @@
1
  import gradio as gr
2
  import torch
3
- from transformers import AutoModelForCausalLM, AutoTokenizer, AutoProcessor, AutoModel
4
  from PIL import Image
5
  import logging
6
  import spaces
 
7
 
8
  # Setup logging
9
  logging.basicConfig(level=logging.INFO)
10
 
11
  class LLaVAPhiModel:
12
  def __init__(self, model_id="sagar007/Lava_phi"):
13
- self.device = "cuda" # Always use cuda with ZeroGPU
14
  self.model_id = model_id
15
  logging.info("Initializing LLaVA-Phi model...")
16
 
17
- # Initialize tokenizer (can be done outside GPU context)
18
  self.tokenizer = AutoTokenizer.from_pretrained(model_id)
19
  if self.tokenizer.pad_token is None:
20
  self.tokenizer.pad_token = self.tokenizer.eos_token
21
 
22
  try:
23
- # Initialize processor (can be done outside GPU context)
24
- self.processor = AutoProcessor.from_pretrained("huggingface/clip-vit-base-patch32")
 
25
  except Exception as e:
26
- logging.warning(f"Failed to load CLIP processor: {str(e)}")
27
- # Fallback to basic tokenizer if needed
28
  self.processor = None
29
 
30
- # Store conversation history
31
  self.history = []
32
-
33
- # Lazy loading of models - will be initialized in GPU context
34
  self.model = None
35
  self.clip = None
36
 
@@ -38,7 +36,7 @@ class LLaVAPhiModel:
38
  def ensure_models_loaded(self):
39
  """Ensure models are loaded in GPU context"""
40
  if self.model is None:
41
- # Load main model
42
  from transformers import BitsAndBytesConfig
43
  quantization_config = BitsAndBytesConfig(
44
  load_in_4bit=True,
@@ -47,34 +45,37 @@ class LLaVAPhiModel:
47
  bnb_4bit_quant_type="nf4"
48
  )
49
 
50
- self.model = AutoModelForCausalLM.from_pretrained(
51
- self.model_id,
52
- quantization_config=quantization_config,
53
- device_map="auto",
54
- torch_dtype=torch.bfloat16,
55
- trust_remote_code=True
56
- )
57
- self.model.config.pad_token_id = self.tokenizer.eos_token_id
 
 
 
 
 
58
 
59
  if self.clip is None:
60
- # Load CLIP model if not already loaded
61
- if self.clip is None:
62
- try:
63
- self.clip = AutoModel.from_pretrained("huggingface/clip-vit-base-patch32").to(self.device)
64
- except Exception as e:
65
- logging.warning(f"Failed to load CLIP model: {str(e)}")
66
- self.clip = None
67
 
68
  @spaces.GPU
69
  def process_image(self, image):
70
- """Process image through CLIP if available, otherwise return None"""
71
  try:
72
- # Ensure models are loaded
73
  self.ensure_models_loaded()
74
 
75
- # If CLIP isn't available, return None
76
  if self.clip is None or self.processor is None:
77
- logging.warning("CLIP model or processor not available - skipping image processing")
78
  return None
79
 
80
  # Convert image to correct format
@@ -83,12 +84,18 @@ class LLaVAPhiModel:
83
  elif isinstance(image, numpy.ndarray):
84
  image = Image.fromarray(image)
85
 
 
 
 
 
86
  with torch.no_grad():
87
  try:
 
88
  image_inputs = self.processor(images=image, return_tensors="pt")
89
  image_features = self.clip.get_image_features(
90
  pixel_values=image_inputs.pixel_values.to(self.device)
91
  )
 
92
  return image_features
93
  except Exception as e:
94
  logging.error(f"Error during image processing: {str(e)}")
@@ -97,10 +104,9 @@ class LLaVAPhiModel:
97
  logging.error(f"Error in process_image: {str(e)}")
98
  return None
99
 
100
- @spaces.GPU(duration=120) # Set longer duration for generation
101
  def generate_response(self, message, image=None):
102
  try:
103
- # Ensure models are loaded
104
  self.ensure_models_loaded()
105
 
106
  if image is not None:
@@ -176,6 +182,7 @@ class LLaVAPhiModel:
176
 
177
  response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
178
 
 
179
  if "gpt:" in response:
180
  response = response.split("gpt:")[-1].strip()
181
  if "human:" in response:
@@ -190,7 +197,7 @@ class LLaVAPhiModel:
190
  logging.error(f"Error generating response: {str(e)}")
191
  logging.error(f"Full traceback:", exc_info=True)
192
  return f"Error: {str(e)}"
193
-
194
  def clear_history(self):
195
  self.history = []
196
  return None
 
1
  import gradio as gr
2
  import torch
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer, CLIPProcessor, CLIPModel
4
  from PIL import Image
5
  import logging
6
  import spaces
7
+ import numpy
8
 
9
  # Setup logging
10
  logging.basicConfig(level=logging.INFO)
11
 
12
  class LLaVAPhiModel:
13
  def __init__(self, model_id="sagar007/Lava_phi"):
14
+ self.device = "cuda"
15
  self.model_id = model_id
16
  logging.info("Initializing LLaVA-Phi model...")
17
 
18
+ # Initialize tokenizer
19
  self.tokenizer = AutoTokenizer.from_pretrained(model_id)
20
  if self.tokenizer.pad_token is None:
21
  self.tokenizer.pad_token = self.tokenizer.eos_token
22
 
23
  try:
24
+ # Use CLIPProcessor directly instead of AutoProcessor
25
+ self.processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
26
+ logging.info("Successfully loaded CLIP processor")
27
  except Exception as e:
28
+ logging.error(f"Failed to load CLIP processor: {str(e)}")
 
29
  self.processor = None
30
 
 
31
  self.history = []
 
 
32
  self.model = None
33
  self.clip = None
34
 
 
36
  def ensure_models_loaded(self):
37
  """Ensure models are loaded in GPU context"""
38
  if self.model is None:
39
+ # Load main model with updated quantization config
40
  from transformers import BitsAndBytesConfig
41
  quantization_config = BitsAndBytesConfig(
42
  load_in_4bit=True,
 
45
  bnb_4bit_quant_type="nf4"
46
  )
47
 
48
+ try:
49
+ self.model = AutoModelForCausalLM.from_pretrained(
50
+ self.model_id,
51
+ quantization_config=quantization_config,
52
+ device_map="auto",
53
+ torch_dtype=torch.bfloat16,
54
+ trust_remote_code=True
55
+ )
56
+ self.model.config.pad_token_id = self.tokenizer.eos_token_id
57
+ logging.info("Successfully loaded main model")
58
+ except Exception as e:
59
+ logging.error(f"Failed to load main model: {str(e)}")
60
+ raise
61
 
62
  if self.clip is None:
63
+ try:
64
+ # Use CLIPModel directly instead of AutoModel
65
+ self.clip = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(self.device)
66
+ logging.info("Successfully loaded CLIP model")
67
+ except Exception as e:
68
+ logging.error(f"Failed to load CLIP model: {str(e)}")
69
+ self.clip = None
70
 
71
  @spaces.GPU
72
  def process_image(self, image):
73
+ """Process image through CLIP if available"""
74
  try:
 
75
  self.ensure_models_loaded()
76
 
 
77
  if self.clip is None or self.processor is None:
78
+ logging.warning("CLIP model or processor not available")
79
  return None
80
 
81
  # Convert image to correct format
 
84
  elif isinstance(image, numpy.ndarray):
85
  image = Image.fromarray(image)
86
 
87
+ # Ensure image is in RGB mode
88
+ if image.mode != 'RGB':
89
+ image = image.convert('RGB')
90
+
91
  with torch.no_grad():
92
  try:
93
+ # Process image with error handling
94
  image_inputs = self.processor(images=image, return_tensors="pt")
95
  image_features = self.clip.get_image_features(
96
  pixel_values=image_inputs.pixel_values.to(self.device)
97
  )
98
+ logging.info("Successfully processed image through CLIP")
99
  return image_features
100
  except Exception as e:
101
  logging.error(f"Error during image processing: {str(e)}")
 
104
  logging.error(f"Error in process_image: {str(e)}")
105
  return None
106
 
107
+ @spaces.GPU(duration=120)
108
  def generate_response(self, message, image=None):
109
  try:
 
110
  self.ensure_models_loaded()
111
 
112
  if image is not None:
 
182
 
183
  response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
184
 
185
+ # Clean up response
186
  if "gpt:" in response:
187
  response = response.split("gpt:")[-1].strip()
188
  if "human:" in response:
 
197
  logging.error(f"Error generating response: {str(e)}")
198
  logging.error(f"Full traceback:", exc_info=True)
199
  return f"Error: {str(e)}"
200
+
201
  def clear_history(self):
202
  self.history = []
203
  return None