Spaces:
Paused
Paused
Commit
·
4ba4563
1
Parent(s):
80150e9
Add load 8bit back
Browse files
app.py
CHANGED
@@ -3,17 +3,15 @@ import transformers as t
|
|
3 |
import torch
|
4 |
import peft
|
5 |
|
6 |
-
checkpoint = 1200
|
7 |
-
|
8 |
# Load your fine-tuned model and tokenizer
|
9 |
tokenizer = t.AutoTokenizer.from_pretrained("NousResearch/Llama-2-7b-hf")
|
10 |
-
model = t.AutoModelForCausalLM.from_pretrained("NousResearch/Llama-2-7b-hf")
|
11 |
tokenizer.pad_token_id = 0
|
12 |
|
13 |
config = peft.LoraConfig(r=8, lora_alpha=16, target_modules=["q_proj", "v_proj"], lora_dropout=0.005, bias="none", task_type="CAUSAL_LM")
|
14 |
model = peft.get_peft_model(model, config)
|
15 |
|
16 |
-
peft.set_peft_model_state_dict(model, torch.load(f"
|
17 |
|
18 |
# Define a prediction function
|
19 |
def generate_article(title):
|
|
|
3 |
import torch
|
4 |
import peft
|
5 |
|
|
|
|
|
6 |
# Load your fine-tuned model and tokenizer
|
7 |
tokenizer = t.AutoTokenizer.from_pretrained("NousResearch/Llama-2-7b-hf")
|
8 |
+
model = t.AutoModelForCausalLM.from_pretrained("NousResearch/Llama-2-7b-hf",load_in_8bit=True, torch_dtype=torch.float16)
|
9 |
tokenizer.pad_token_id = 0
|
10 |
|
11 |
config = peft.LoraConfig(r=8, lora_alpha=16, target_modules=["q_proj", "v_proj"], lora_dropout=0.005, bias="none", task_type="CAUSAL_LM")
|
12 |
model = peft.get_peft_model(model, config)
|
13 |
|
14 |
+
peft.set_peft_model_state_dict(model, torch.load(f".weights/adapter_model.bin"))
|
15 |
|
16 |
# Define a prediction function
|
17 |
def generate_article(title):
|