Spaces:
Runtime error
Runtime error
Braszczynski
commited on
Commit
•
32ab136
1
Parent(s):
e1c82eb
Update app.py
Browse files
app.py
CHANGED
@@ -15,12 +15,10 @@ load_in_4bit = True # Set to True if you want to use 4-bit quantization
|
|
15 |
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
|
16 |
|
17 |
# Load the base model with adapters
|
18 |
-
model = AutoAdapterModel.from_pretrained(model_name, low_cpu_mem_usage=True)
|
19 |
model.load_adapter(lora_adapter)
|
20 |
|
21 |
-
|
22 |
-
device = torch.device("cpu")
|
23 |
-
model.to(device)
|
24 |
|
25 |
def respond(message, history, system_message, max_tokens, temperature, top_p):
|
26 |
# Combine system message and chat history
|
|
|
15 |
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
|
16 |
|
17 |
# Load the base model with adapters
|
18 |
+
model = AutoAdapterModel.from_pretrained(model_name, low_cpu_mem_usage=True).to("cuda")
|
19 |
model.load_adapter(lora_adapter)
|
20 |
|
21 |
+
|
|
|
|
|
22 |
|
23 |
def respond(message, history, system_message, max_tokens, temperature, top_p):
|
24 |
# Combine system message and chat history
|