Braszczynski commited on
Commit
32ab136
1 Parent(s): e1c82eb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -4
app.py CHANGED
@@ -15,12 +15,10 @@ load_in_4bit = True # Set to True if you want to use 4-bit quantization
15
  tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
16
 
17
  # Load the base model with adapters
18
- model = AutoAdapterModel.from_pretrained(model_name, low_cpu_mem_usage=True)
19
  model.load_adapter(lora_adapter)
20
 
21
- # Move the model to CPU
22
- device = torch.device("cpu")
23
- model.to(device)
24
 
25
  def respond(message, history, system_message, max_tokens, temperature, top_p):
26
  # Combine system message and chat history
 
15
  tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
16
 
17
  # Load the base model with adapters
18
+ model = AutoAdapterModel.from_pretrained(model_name, low_cpu_mem_usage=True).to("cuda")
19
  model.load_adapter(lora_adapter)
20
 
21
+
 
 
22
 
23
  def respond(message, history, system_message, max_tokens, temperature, top_p):
24
  # Combine system message and chat history