Spaces:

oskaralf
/

Lab2

Runtime error

oskaralf commited on Dec 2, 2024

Commit

14fafef

1 Parent(s): e27b9eb

try

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,27 +1,27 @@
 import torch
 from unsloth import FastLanguageModel
-# Check if CUDA is available
-device = "cuda" if torch.cuda.is_available() else "cpu"
-# Load the base model
 base_model_name = "unsloth/Llama-3.2-3B-Instruct"
 base_model, tokenizer = FastLanguageModel.from_pretrained(
     model_name=base_model_name,
     max_seq_length=2048,
-    dtype=None,  # Auto-detect data type
-    load_in_4bit=False,  # Disable 4-bit quantization for CPU
 )
 base_model.to(device)
-# Apply LoRA adapters
 from peft import PeftModel
 lora_model_name = "oskaralf/lora_model"  # Replace with your LoRA model path
 model = PeftModel.from_pretrained(base_model, lora_model_name)
 model.to(device)
-# Prepare for inference
 FastLanguageModel.for_inference(model)
 # Gradio interface

 import torch
 from unsloth import FastLanguageModel
+# Force CPU mode
+device = "cpu"
+# Load the base model in CPU mode
 base_model_name = "unsloth/Llama-3.2-3B-Instruct"
 base_model, tokenizer = FastLanguageModel.from_pretrained(
     model_name=base_model_name,
     max_seq_length=2048,
+    dtype="float32",  # Use float32 for CPU
+    load_in_4bit=False  # Disable 4-bit quantization for CPU
 )
 base_model.to(device)
+# Apply LoRA adapters in CPU mode
 from peft import PeftModel
 lora_model_name = "oskaralf/lora_model"  # Replace with your LoRA model path
 model = PeftModel.from_pretrained(base_model, lora_model_name)
 model.to(device)
+# Prepare for inference in CPU mode
 FastLanguageModel.for_inference(model)
 # Gradio interface