|
import torch |
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
from urllib.parse import unquote_plus |
|
import os |
|
|
|
from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed, Trainer, TrainingArguments, BitsAndBytesConfig, \ |
|
DataCollatorForLanguageModeling, Trainer, TrainingArguments |
|
from transformers import BitsAndBytesConfig |
|
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training |
|
|
|
nf4_config = BitsAndBytesConfig( |
|
load_in_4bit=True, |
|
bnb_4bit_quant_type="nf4", |
|
bnb_4bit_use_double_quant=True, |
|
bnb_4bit_compute_dtype=torch.bfloat16 |
|
) |
|
|
|
|
|
device = "cuda:0" |
|
model_id = "meta-llama/Meta-Llama-3-8B-Instruct" |
|
model = AutoModelForCausalLM.from_pretrained(model_id,quantization_config=nf4_config,device_map="auto",local_files_only=False,trust_remote_code=True) |
|
tokenizer = AutoTokenizer.from_pretrained(model_id, use_default_system_prompt=False) |
|
if tokenizer.pad_token is None: |
|
tokenizer.pad_token = tokenizer.eos_token |
|
print(model) |
|
from transformers import AutoModelForCausalLM |
|
from datasets import load_dataset |
|
from trl import * |
|
|
|
|
|
|
|
def return_prompt_and_responses(samples) : |
|
return { |
|
"prompt": [ |
|
"Question: " + question + "\n\nAnswer: " |
|
for question in samples["prompt"] |
|
], |
|
"chosen": samples["chosen"], |
|
"rejected": samples["rejected"], |
|
} |
|
|
|
dataset = load_dataset( |
|
"jondurbin/truthy-dpo-v0.1", |
|
split="train", |
|
|
|
) |
|
original_columns = dataset.column_names |
|
|
|
dataset.map( |
|
return_prompt_and_responses, |
|
batched=True, |
|
remove_columns=original_columns |
|
) |
|
|
|
|
|
model = prepare_model_for_kbit_training(model) |
|
|
|
peft_config = LoraConfig( |
|
r=128, |
|
lora_alpha=16, |
|
target_modules=["q_proj","k_proj","v_proj","o_proj", "up_proj","gate_proj","down_proj", "lm_head"], |
|
lora_dropout=0.05, |
|
bias="none", |
|
task_type="CAUSAL_LM", |
|
) |
|
output_dir = "./odp" |
|
training_args = TrainingArguments( |
|
per_device_train_batch_size=1, |
|
gradient_accumulation_steps=1, |
|
gradient_checkpointing =True, |
|
max_grad_norm= 0.3, |
|
optim='adafactor', |
|
overwrite_output_dir=True,save_steps=100, |
|
num_train_epochs=1, |
|
learning_rate=2e-4, |
|
bf16=True, |
|
save_total_limit=3, |
|
logging_steps=10, |
|
output_dir=output_dir, |
|
lr_scheduler_type="cosine", |
|
warmup_ratio=0.05, |
|
) |
|
|
|
dpo_trainer = DPOTrainer( |
|
model, |
|
|
|
args=training_args, |
|
peft_config=peft_config, |
|
beta=0.1, |
|
train_dataset=dataset, |
|
tokenizer=tokenizer, |
|
max_prompt_length=1024, |
|
max_length=2048, |
|
) |
|
|
|
dpo_trainer.train() |
|
|
|
|
|
|
|
|