cloudyu's picture
Upload folder using huggingface_hub
3a01bc4 verified
raw
history blame
2.61 kB
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from urllib.parse import unquote_plus
import os
from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed, Trainer, TrainingArguments, BitsAndBytesConfig, \
DataCollatorForLanguageModeling, Trainer, TrainingArguments
from transformers import BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
nf4_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_use_double_quant=True,
bnb_4bit_compute_dtype=torch.bfloat16
)
# Carregar o modelo e o tokenizador na GPU
device = "cuda:0"
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
model = AutoModelForCausalLM.from_pretrained(model_id,quantization_config=nf4_config,device_map="auto",local_files_only=False,trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(model_id, use_default_system_prompt=False)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
print(model)
from transformers import AutoModelForCausalLM
from datasets import load_dataset
from trl import *
# jondurbin/truthy-dpo-v0.1
def return_prompt_and_responses(samples) :
return {
"prompt": [
"Question: " + question + "\n\nAnswer: "
for question in samples["prompt"]
],
"chosen": samples["chosen"], # rated better than k
"rejected": samples["rejected"], # rated worse than j
}
dataset = load_dataset(
"jondurbin/truthy-dpo-v0.1",
split="train",
#data_dir="data/rl"
)
original_columns = dataset.column_names
dataset.map(
return_prompt_and_responses,
batched=True,
remove_columns=original_columns
)
model = prepare_model_for_kbit_training(model)
peft_config = LoraConfig(
r=128,
lora_alpha=16,
target_modules=["q_proj","k_proj","v_proj","o_proj", "up_proj","gate_proj","down_proj", "lm_head"],
lora_dropout=0.05,
bias="none",
task_type="CAUSAL_LM",
)
output_dir = "./odp"
training_args = TrainingArguments(
per_device_train_batch_size=1,
gradient_accumulation_steps=1,
gradient_checkpointing =True,
max_grad_norm= 0.3,
optim='adafactor',
overwrite_output_dir=True,save_steps=100,
num_train_epochs=1,
learning_rate=2e-4,
bf16=True,
save_total_limit=3,
logging_steps=10,
output_dir=output_dir,
lr_scheduler_type="cosine",
warmup_ratio=0.05,
)
dpo_trainer = DPOTrainer(
model,
#model_ref,
args=training_args,
peft_config=peft_config,
beta=0.1,
train_dataset=dataset,
tokenizer=tokenizer,
max_prompt_length=1024,
max_length=2048,
)
dpo_trainer.train()