oskaralf commited on
Commit
14fafef
·
1 Parent(s): e27b9eb
Files changed (1) hide show
  1. app.py +7 -7
app.py CHANGED
@@ -1,27 +1,27 @@
1
  import torch
2
  from unsloth import FastLanguageModel
3
 
4
- # Check if CUDA is available
5
- device = "cuda" if torch.cuda.is_available() else "cpu"
6
 
7
- # Load the base model
8
  base_model_name = "unsloth/Llama-3.2-3B-Instruct"
9
  base_model, tokenizer = FastLanguageModel.from_pretrained(
10
  model_name=base_model_name,
11
  max_seq_length=2048,
12
- dtype=None, # Auto-detect data type
13
- load_in_4bit=False, # Disable 4-bit quantization for CPU
14
  )
15
  base_model.to(device)
16
 
17
- # Apply LoRA adapters
18
  from peft import PeftModel
19
 
20
  lora_model_name = "oskaralf/lora_model" # Replace with your LoRA model path
21
  model = PeftModel.from_pretrained(base_model, lora_model_name)
22
  model.to(device)
23
 
24
- # Prepare for inference
25
  FastLanguageModel.for_inference(model)
26
 
27
  # Gradio interface
 
1
  import torch
2
  from unsloth import FastLanguageModel
3
 
4
+ # Force CPU mode
5
+ device = "cpu"
6
 
7
+ # Load the base model in CPU mode
8
  base_model_name = "unsloth/Llama-3.2-3B-Instruct"
9
  base_model, tokenizer = FastLanguageModel.from_pretrained(
10
  model_name=base_model_name,
11
  max_seq_length=2048,
12
+ dtype="float32", # Use float32 for CPU
13
+ load_in_4bit=False # Disable 4-bit quantization for CPU
14
  )
15
  base_model.to(device)
16
 
17
+ # Apply LoRA adapters in CPU mode
18
  from peft import PeftModel
19
 
20
  lora_model_name = "oskaralf/lora_model" # Replace with your LoRA model path
21
  model = PeftModel.from_pretrained(base_model, lora_model_name)
22
  model.to(device)
23
 
24
+ # Prepare for inference in CPU mode
25
  FastLanguageModel.for_inference(model)
26
 
27
  # Gradio interface