File size: 2,606 Bytes
3a01bc4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from urllib.parse import unquote_plus
import os
from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed, Trainer, TrainingArguments, BitsAndBytesConfig, \
DataCollatorForLanguageModeling, Trainer, TrainingArguments
from transformers import BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
nf4_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_use_double_quant=True,
bnb_4bit_compute_dtype=torch.bfloat16
)
# Carregar o modelo e o tokenizador na GPU
device = "cuda:0"
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
model = AutoModelForCausalLM.from_pretrained(model_id,quantization_config=nf4_config,device_map="auto",local_files_only=False,trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(model_id, use_default_system_prompt=False)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
print(model)
from transformers import AutoModelForCausalLM
from datasets import load_dataset
from trl import *
# jondurbin/truthy-dpo-v0.1
def return_prompt_and_responses(samples) :
return {
"prompt": [
"Question: " + question + "\n\nAnswer: "
for question in samples["prompt"]
],
"chosen": samples["chosen"], # rated better than k
"rejected": samples["rejected"], # rated worse than j
}
dataset = load_dataset(
"jondurbin/truthy-dpo-v0.1",
split="train",
#data_dir="data/rl"
)
original_columns = dataset.column_names
dataset.map(
return_prompt_and_responses,
batched=True,
remove_columns=original_columns
)
model = prepare_model_for_kbit_training(model)
peft_config = LoraConfig(
r=128,
lora_alpha=16,
target_modules=["q_proj","k_proj","v_proj","o_proj", "up_proj","gate_proj","down_proj", "lm_head"],
lora_dropout=0.05,
bias="none",
task_type="CAUSAL_LM",
)
output_dir = "./odp"
training_args = TrainingArguments(
per_device_train_batch_size=1,
gradient_accumulation_steps=1,
gradient_checkpointing =True,
max_grad_norm= 0.3,
optim='adafactor',
overwrite_output_dir=True,save_steps=100,
num_train_epochs=1,
learning_rate=2e-4,
bf16=True,
save_total_limit=3,
logging_steps=10,
output_dir=output_dir,
lr_scheduler_type="cosine",
warmup_ratio=0.05,
)
dpo_trainer = DPOTrainer(
model,
#model_ref,
args=training_args,
peft_config=peft_config,
beta=0.1,
train_dataset=dataset,
tokenizer=tokenizer,
max_prompt_length=1024,
max_length=2048,
)
dpo_trainer.train()
|