Spaces:

mherrador
/

CE5.0_expert_v2

Runtime error

mherrador commited on Aug 6, 2024

Commit

00fb0dc

verified ·

1 Parent(s): b746128

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,7 +3,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 import torch
 # Load your model and tokenizer using the adapter weights
-model_name = "mherrador/CE5.0_expert_v2"  # Replace with your actual model name
 bnb_config = BitsAndBytesConfig(
     load_in_4bit=True,
     bnb_4bit_use_double_quant=True,
@@ -11,17 +11,21 @@ bnb_config = BitsAndBytesConfig(
     bnb_4bit_compute_dtype=torch.bfloat16,
 )
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
     quantization_config=bnb_config,
-    device_map="auto",
     trust_remote_code=True,
-)
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 # Function to generate recommendations
 def generate_recommendations(input_text):
-    inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
     outputs = model.generate(**inputs, max_new_tokens=128)
     recommendations = tokenizer.batch_decode(outputs)[0]
     return recommendations

 import torch
 # Load your model and tokenizer using the adapter weights
+model_name = "mherrador/CE5.0_expert_v2"
 bnb_config = BitsAndBytesConfig(
     load_in_4bit=True,
     bnb_4bit_use_double_quant=True,
     bnb_4bit_compute_dtype=torch.bfloat16,
 )
+# Explicitly set device to CPU
+device = torch.device("cpu")
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
     quantization_config=bnb_config,
+    # device_map="auto",  # Let Transformers choose the best device
     trust_remote_code=True,
+).to(device)  # Move model to the specified device
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 # Function to generate recommendations
 def generate_recommendations(input_text):
+    inputs = tokenizer(input_text, return_tensors="pt").to(device)  # Move input to device
     outputs = model.generate(**inputs, max_new_tokens=128)
     recommendations = tokenizer.batch_decode(outputs)[0]
     return recommendations