Theresa Hoesl commited on
Commit
93fe578
·
1 Parent(s): 9c587b3

added torch type in load_model

Browse files
Files changed (1) hide show
  1. app.py +10 -3
app.py CHANGED
@@ -93,19 +93,26 @@ import torch
93
  # Load the model and tokenizer
94
  def load_model():
95
  lora_model_name = "sreyanghosh/lora_model" # Replace with your LoRA model path
 
 
96
  model = AutoPeftModelForCausalLM.from_pretrained(
97
  lora_model_name,
98
- load_in_4bit=False,
99
- )
 
 
 
100
  tokenizer = AutoTokenizer.from_pretrained(lora_model_name)
 
101
  if tokenizer.pad_token_id is None:
102
  tokenizer.pad_token_id = tokenizer.eos_token_id
 
103
  model.eval()
104
  device = "cuda" if torch.cuda.is_available() else "cpu"
105
  model = model.to(device)
 
106
  return tokenizer, model
107
 
108
- tokenizer, model = load_model()
109
 
110
  # Define the respond function
111
  def respond(
 
93
  # Load the model and tokenizer
94
  def load_model():
95
  lora_model_name = "sreyanghosh/lora_model" # Replace with your LoRA model path
96
+
97
+ # Try loading without 4-bit quantization
98
  model = AutoPeftModelForCausalLM.from_pretrained(
99
  lora_model_name,
100
+ torch_dtype=torch.float32, # Ensure no low-bit quantization
101
+ device_map="auto" if torch.cuda.is_available() else None, # Use standard device mapping
102
+ load_in_4bit=False, # Redundant, but safe to explicitly specify
103
+ )
104
+
105
  tokenizer = AutoTokenizer.from_pretrained(lora_model_name)
106
+
107
  if tokenizer.pad_token_id is None:
108
  tokenizer.pad_token_id = tokenizer.eos_token_id
109
+
110
  model.eval()
111
  device = "cuda" if torch.cuda.is_available() else "cpu"
112
  model = model.to(device)
113
+
114
  return tokenizer, model
115
 
 
116
 
117
  # Define the respond function
118
  def respond(