Spaces:

sagar007
/

Lava_phi_model

Running

App Files Files Community

sagar007 commited on Jan 2

Commit

2144e66

verified ·

1 Parent(s): 5c998e9

Update app.py

Browse files

Files changed (1) hide show

app.py +117 -84

app.py CHANGED Viewed

@@ -1,9 +1,8 @@
 import gradio as gr
 import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer, AutoProcessor, AutoConfig, AutoModel
 from PIL import Image
 import logging
-from transformers import BitsAndBytesConfig
 # Setup logging
 logging.basicConfig(level=logging.INFO)
@@ -13,24 +12,36 @@ class LLaVAPhiModel:
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         logging.info(f"Using device: {self.device}")
-        # Initialize quantization config
-        quantization_config = BitsAndBytesConfig(
-            load_in_4bit=True,
-            bnb_4bit_compute_dtype=torch.float16,
-            bnb_4bit_use_double_quant=True,
-            bnb_4bit_quant_type="nf4"
-        )
         try:
-            # Load model directly from Hugging Face Hub
             logging.info(f"Loading model from {model_id}...")
             self.model = AutoModelForCausalLM.from_pretrained(
                 model_id,
-                quantization_config=quantization_config,
-                device_map="auto",
-                torch_dtype=torch.bfloat16,
-                trust_remote_code=True
             )
             self.tokenizer = AutoTokenizer.from_pretrained(model_id)
             # Set up padding token
@@ -49,24 +60,41 @@ class LLaVAPhiModel:
         except Exception as e:
             logging.error(f"Error initializing model: {str(e)}")
             raise
     def process_image(self, image):
         """Process image through CLIP"""
-        with torch.no_grad():
-            image_inputs = self.processor(images=image, return_tensors="pt")
-            image_features = self.clip.get_image_features(
-                pixel_values=image_inputs.pixel_values.to(self.device)
-            )
-            return image_features
     def generate_response(self, message, image=None):
         try:
             if image is not None:
-                # Get image features
-                image_features = self.process_image(image)
                 # Format prompt
-                prompt = f"human: <image>\n{message}\ngpt:"
                 # Add context from history
                 context = ""
@@ -85,8 +113,9 @@ class LLaVAPhiModel:
                 )
                 inputs = {k: v.to(self.device) for k, v in inputs.items()}
-                # Add image features to inputs
-                inputs["image_features"] = image_features
                 # Generate response
                 with torch.no_grad():
@@ -163,63 +192,67 @@ class LLaVAPhiModel:
         return None
 def create_demo():
-    # Initialize model
-    model = LLaVAPhiModel()
-    with gr.Blocks(css="footer {visibility: hidden}") as demo:
-        gr.Markdown(
-            """
-            # LLaVA-Phi Demo
-            Chat with a vision-language model that can understand both text and images.
-            """
-        )
-        chatbot = gr.Chatbot(height=400)
-        with gr.Row():
-            with gr.Column(scale=0.7):
-                msg = gr.Textbox(
-                    show_label=False,
-                    placeholder="Enter text and/or upload an image",
-                    container=False
-                )
-            with gr.Column(scale=0.15, min_width=0):
-                clear = gr.Button("Clear")
-            with gr.Column(scale=0.15, min_width=0):
-                submit = gr.Button("Submit", variant="primary")
-        image = gr.Image(type="pil", label="Upload Image (Optional)")
-        def respond(message, chat_history, image):
-            if not message and image is None:
-                return chat_history
-            response = model.generate_response(message, image)
-            chat_history.append((message, response))
-            return "", chat_history
-        def clear_chat():
-            model.clear_history()
-            return None, None
-        submit.click(
-            respond,
-            [msg, chatbot, image],
-            [msg, chatbot],
-        )
-        clear.click(
-            clear_chat,
-            None,
-            [chatbot, image],
-        )
-        msg.submit(
-            respond,
-            [msg, chatbot, image],
-            [msg, chatbot],
-        )
-    return demo
 if __name__ == "__main__":
     demo = create_demo()
@@ -227,4 +260,4 @@ if __name__ == "__main__":
         server_name="0.0.0.0",
         server_port=7860,
         share=True
-    )

 import gradio as gr
 import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, AutoProcessor, AutoModel
 from PIL import Image
 import logging
 # Setup logging
 logging.basicConfig(level=logging.INFO)
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         logging.info(f"Using device: {self.device}")
         try:
+            # Load model with appropriate settings based on available hardware
             logging.info(f"Loading model from {model_id}...")
+            # Determine model loading configuration
+            model_kwargs = {
+                "device_map": "auto",
+                "trust_remote_code": True
+            }
+            # Add quantization only if CUDA is available
+            if torch.cuda.is_available():
+                from transformers import BitsAndBytesConfig
+                quantization_config = BitsAndBytesConfig(
+                    load_in_4bit=True,
+                    bnb_4bit_compute_dtype=torch.float16,
+                    bnb_4bit_use_double_quant=True,
+                    bnb_4bit_quant_type="nf4"
+                )
+                model_kwargs["quantization_config"] = quantization_config
+                model_kwargs["torch_dtype"] = torch.bfloat16
+            else:
+                # For CPU, use lighter configuration
+                model_kwargs["torch_dtype"] = torch.float32
             self.model = AutoModelForCausalLM.from_pretrained(
                 model_id,
+                **model_kwargs
             )
             self.tokenizer = AutoTokenizer.from_pretrained(model_id)
             # Set up padding token
         except Exception as e:
             logging.error(f"Error initializing model: {str(e)}")
             raise
     def process_image(self, image):
         """Process image through CLIP"""
+        try:
+            # Ensure image is in correct format
+            if isinstance(image, str):  # If image path is provided
+                image = Image.open(image)
+            elif isinstance(image, numpy.ndarray):  # If numpy array (from gradio)
+                image = Image.fromarray(image)
+            with torch.no_grad():
+                image_inputs = self.processor(images=image, return_tensors="pt")
+                image_features = self.clip.get_image_features(
+                    pixel_values=image_inputs.pixel_values.to(self.device)
+                )
+                return image_features
+        except Exception as e:
+            logging.error(f"Error processing image: {str(e)}")
+            raise
     def generate_response(self, message, image=None):
         try:
             if image is not None:
+                try:
+                    # Get image features
+                    image_features = self.process_image(image)
+                    has_image = True
+                except Exception as e:
+                    logging.error(f"Failed to process image: {str(e)}")
+                    image_features = None
+                    has_image = False
+                    message = f"Note: Failed to process image. Continuing with text only. Error: {str(e)}\n{message}"
                 # Format prompt
+                prompt = f"human: {'<image>' if has_image else ''}\n{message}\ngpt:"
                 # Add context from history
                 context = ""
                 )
                 inputs = {k: v.to(self.device) for k, v in inputs.items()}
+                # Add image features to inputs if available
+                if has_image:
+                    inputs["image_features"] = image_features
                 # Generate response
                 with torch.no_grad():
         return None
 def create_demo():
+    try:
+        # Initialize model
+        model = LLaVAPhiModel()
+        with gr.Blocks(css="footer {visibility: hidden}") as demo:
+            gr.Markdown(
+                """
+                # LLaVA-Phi Demo
+                Chat with a vision-language model that can understand both text and images.
+                """
+            )
+            chatbot = gr.Chatbot(height=400)
+            with gr.Row():
+                with gr.Column(scale=0.7):
+                    msg = gr.Textbox(
+                        show_label=False,
+                        placeholder="Enter text and/or upload an image",
+                        container=False
+                    )
+                with gr.Column(scale=0.15, min_width=0):
+                    clear = gr.Button("Clear")
+                with gr.Column(scale=0.15, min_width=0):
+                    submit = gr.Button("Submit", variant="primary")
+            image = gr.Image(type="pil", label="Upload Image (Optional)")
+            def respond(message, chat_history, image):
+                if not message and image is None:
+                    return chat_history
+                response = model.generate_response(message, image)
+                chat_history.append((message, response))
+                return "", chat_history
+            def clear_chat():
+                model.clear_history()
+                return None, None
+            submit.click(
+                respond,
+                [msg, chatbot, image],
+                [msg, chatbot],
+            )
+            clear.click(
+                clear_chat,
+                None,
+                [chatbot, image],
+            )
+            msg.submit(
+                respond,
+                [msg, chatbot, image],
+                [msg, chatbot],
+            )
+        return demo
+    except Exception as e:
+        logging.error(f"Error creating demo: {str(e)}")
+        raise
 if __name__ == "__main__":
     demo = create_demo()
         server_name="0.0.0.0",
         server_port=7860,
         share=True
+    )