sreyanghosh commited on
Commit
c76e7c3
·
1 Parent(s): b7359a5

update/tried 8bit cpu quant route

Browse files
Files changed (2) hide show
  1. app.py +5 -1
  2. requirements.txt +1 -2
app.py CHANGED
@@ -7,7 +7,11 @@ def load_model():
7
  base_model_name = "unsloth/llama-3.2-1b-instruct-bnb-4bit" # Replace with your base model name
8
  lora_model_name = "sreyanghosh/lora_model" # Replace with your LoRA model path
9
  tokenizer = AutoTokenizer.from_pretrained(base_model_name)
10
- model = AutoModelForCausalLM.from_pretrained(base_model_name, device_map="auto")
 
 
 
 
11
  model = PeftModel.from_pretrained(model, lora_model_name)
12
  model.eval()
13
  return tokenizer, model
 
7
  base_model_name = "unsloth/llama-3.2-1b-instruct-bnb-4bit" # Replace with your base model name
8
  lora_model_name = "sreyanghosh/lora_model" # Replace with your LoRA model path
9
  tokenizer = AutoTokenizer.from_pretrained(base_model_name)
10
+ model = AutoModelForCausalLM.from_pretrained(
11
+ base_model_name,
12
+ device_map="auto" if torch.cuda.is_available() else None,
13
+ load_in_8bit=not torch.cuda.is_available(),
14
+ )
15
  model = PeftModel.from_pretrained(model, lora_model_name)
16
  model.eval()
17
  return tokenizer, model
requirements.txt CHANGED
@@ -3,5 +3,4 @@ gradio
3
  transformers
4
  peft
5
  torch
6
- # unsloth
7
- bitsandbytes
 
3
  transformers
4
  peft
5
  torch
6
+ bitsandbytes>=0.37