Spaces:

spoorthibhat
/

Llava-Med

Paused

spoorthibhat commited on Dec 10, 2024

Commit

087cd4e

verified ·

1 Parent(s): d5e6700

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -31,23 +31,16 @@ print(f"Using device: {device}")
 # Define the model path
 model_path = "Veda0718/llava-med-v1.5-mistral-7b-finetuned"
-# Load the model
-try:
-    tokenizer, model, image_processor, context_len = load_pretrained_model(
-        model_path=model_path,
-        model_base=None,
-        model_name=get_model_name_from_path(model_path)
-    )
-    # Move model to appropriate device
-    model = model.to(device)
-    # Enable gradient checkpointing to save memory
-    model.gradient_checkpointing_enable()
-except Exception as e:
-    print(f"Error loading model: {e}")
-    tokenizer, model, image_processor, context_len = None, None, None, None
 # Define the inference function
 def run_inference(image, question):

 # Define the model path
 model_path = "Veda0718/llava-med-v1.5-mistral-7b-finetuned"
+kwargs = {"device_map": "auto"}
+kwargs['load_in_4bit'] = True
+kwargs['quantization_config'] = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_compute_dtype=torch.float16,
+    bnb_4bit_use_double_quant=True,
+    bnb_4bit_quant_type='nf4'
+)
+model = LlavaMistralForCausalLM.from_pretrained(model_path, low_cpu_mem_usage=True, **kwargs)
+tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=False)
 # Define the inference function
 def run_inference(image, question):