Spaces:

sreyanghosh
/

lora_peft_id2223

Sleeping

Theresa Hoesl commited on Dec 4, 2024

Commit

93fe578

1 Parent(s): 9c587b3

added torch type in load_model

Files changed (1) hide show

app.py CHANGED Viewed

@@ -93,19 +93,26 @@ import torch
 # Load the model and tokenizer
 def load_model():
     lora_model_name = "sreyanghosh/lora_model"  # Replace with your LoRA model path
     model = AutoPeftModelForCausalLM.from_pretrained(
         lora_model_name,
-        load_in_4bit=False,
-    )
     tokenizer = AutoTokenizer.from_pretrained(lora_model_name)
     if tokenizer.pad_token_id is None:
         tokenizer.pad_token_id = tokenizer.eos_token_id
     model.eval()
     device = "cuda" if torch.cuda.is_available() else "cpu"
     model = model.to(device)
     return tokenizer, model
-tokenizer, model = load_model()
 # Define the respond function
 def respond(

 # Load the model and tokenizer
 def load_model():
     lora_model_name = "sreyanghosh/lora_model"  # Replace with your LoRA model path
+    # Try loading without 4-bit quantization
     model = AutoPeftModelForCausalLM.from_pretrained(
         lora_model_name,
+        torch_dtype=torch.float32,  # Ensure no low-bit quantization
+        device_map="auto" if torch.cuda.is_available() else None,  # Use standard device mapping
+        load_in_4bit=False,  # Redundant, but safe to explicitly specify
+        )
     tokenizer = AutoTokenizer.from_pretrained(lora_model_name)
     if tokenizer.pad_token_id is None:
         tokenizer.pad_token_id = tokenizer.eos_token_id
     model.eval()
     device = "cuda" if torch.cuda.is_available() else "cpu"
     model = model.to(device)
     return tokenizer, model
 # Define the respond function
 def respond(