mherrador commited on
Commit
00fb0dc
·
verified ·
1 Parent(s): b746128

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -4
app.py CHANGED
@@ -3,7 +3,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
3
  import torch
4
 
5
  # Load your model and tokenizer using the adapter weights
6
- model_name = "mherrador/CE5.0_expert_v2" # Replace with your actual model name
7
  bnb_config = BitsAndBytesConfig(
8
  load_in_4bit=True,
9
  bnb_4bit_use_double_quant=True,
@@ -11,17 +11,21 @@ bnb_config = BitsAndBytesConfig(
11
  bnb_4bit_compute_dtype=torch.bfloat16,
12
  )
13
 
 
 
 
14
  model = AutoModelForCausalLM.from_pretrained(
15
  model_name,
16
  quantization_config=bnb_config,
17
- device_map="auto",
18
  trust_remote_code=True,
19
- )
 
20
  tokenizer = AutoTokenizer.from_pretrained(model_name)
21
 
22
  # Function to generate recommendations
23
  def generate_recommendations(input_text):
24
- inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
25
  outputs = model.generate(**inputs, max_new_tokens=128)
26
  recommendations = tokenizer.batch_decode(outputs)[0]
27
  return recommendations
 
3
  import torch
4
 
5
  # Load your model and tokenizer using the adapter weights
6
+ model_name = "mherrador/CE5.0_expert_v2"
7
  bnb_config = BitsAndBytesConfig(
8
  load_in_4bit=True,
9
  bnb_4bit_use_double_quant=True,
 
11
  bnb_4bit_compute_dtype=torch.bfloat16,
12
  )
13
 
14
+ # Explicitly set device to CPU
15
+ device = torch.device("cpu")
16
+
17
  model = AutoModelForCausalLM.from_pretrained(
18
  model_name,
19
  quantization_config=bnb_config,
20
+ # device_map="auto", # Let Transformers choose the best device
21
  trust_remote_code=True,
22
+ ).to(device) # Move model to the specified device
23
+
24
  tokenizer = AutoTokenizer.from_pretrained(model_name)
25
 
26
  # Function to generate recommendations
27
  def generate_recommendations(input_text):
28
+ inputs = tokenizer(input_text, return_tensors="pt").to(device) # Move input to device
29
  outputs = model.generate(**inputs, max_new_tokens=128)
30
  recommendations = tokenizer.batch_decode(outputs)[0]
31
  return recommendations