Braszczynski commited on
Commit
d086380
1 Parent(s): 5f8dba4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -1
app.py CHANGED
@@ -19,6 +19,8 @@ model, tokenizer = FastLanguageModel.from_pretrained(
19
  load_in_4bit = load_in_4bit,
20
  )
21
  FastLanguageModel.for_inference(model) # Enable native 2x faster inference
 
 
22
 
23
  def respond(message, history, system_message, max_tokens, temperature, top_p):
24
  # Combine system message and chat history
@@ -33,7 +35,7 @@ def respond(message, history, system_message, max_tokens, temperature, top_p):
33
  return_tensors="pt",
34
  truncation=True,
35
  max_length=max_seq_length,
36
- ).to("cuda")
37
 
38
  # Generate the response
39
  with torch.no_grad():
 
19
  load_in_4bit = load_in_4bit,
20
  )
21
  FastLanguageModel.for_inference(model) # Enable native 2x faster inference
22
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
23
+ model = model.to(device)
24
 
25
  def respond(message, history, system_message, max_tokens, temperature, top_p):
26
  # Combine system message and chat history
 
35
  return_tensors="pt",
36
  truncation=True,
37
  max_length=max_seq_length,
38
+ ).to(device)
39
 
40
  # Generate the response
41
  with torch.no_grad():