Spaces:
Sleeping
Sleeping
Theresa Hoesl
commited on
Commit
·
93fe578
1
Parent(s):
9c587b3
added torch type in load_model
Browse files
app.py
CHANGED
@@ -93,19 +93,26 @@ import torch
|
|
93 |
# Load the model and tokenizer
|
94 |
def load_model():
|
95 |
lora_model_name = "sreyanghosh/lora_model" # Replace with your LoRA model path
|
|
|
|
|
96 |
model = AutoPeftModelForCausalLM.from_pretrained(
|
97 |
lora_model_name,
|
98 |
-
|
99 |
-
|
|
|
|
|
|
|
100 |
tokenizer = AutoTokenizer.from_pretrained(lora_model_name)
|
|
|
101 |
if tokenizer.pad_token_id is None:
|
102 |
tokenizer.pad_token_id = tokenizer.eos_token_id
|
|
|
103 |
model.eval()
|
104 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
105 |
model = model.to(device)
|
|
|
106 |
return tokenizer, model
|
107 |
|
108 |
-
tokenizer, model = load_model()
|
109 |
|
110 |
# Define the respond function
|
111 |
def respond(
|
|
|
93 |
# Load the model and tokenizer
|
94 |
def load_model():
|
95 |
lora_model_name = "sreyanghosh/lora_model" # Replace with your LoRA model path
|
96 |
+
|
97 |
+
# Try loading without 4-bit quantization
|
98 |
model = AutoPeftModelForCausalLM.from_pretrained(
|
99 |
lora_model_name,
|
100 |
+
torch_dtype=torch.float32, # Ensure no low-bit quantization
|
101 |
+
device_map="auto" if torch.cuda.is_available() else None, # Use standard device mapping
|
102 |
+
load_in_4bit=False, # Redundant, but safe to explicitly specify
|
103 |
+
)
|
104 |
+
|
105 |
tokenizer = AutoTokenizer.from_pretrained(lora_model_name)
|
106 |
+
|
107 |
if tokenizer.pad_token_id is None:
|
108 |
tokenizer.pad_token_id = tokenizer.eos_token_id
|
109 |
+
|
110 |
model.eval()
|
111 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
112 |
model = model.to(device)
|
113 |
+
|
114 |
return tokenizer, model
|
115 |
|
|
|
116 |
|
117 |
# Define the respond function
|
118 |
def respond(
|