hiro877's picture
add development files
ffe5a2b verified
import json
import os
from unsloth import FastLanguageModel
from datasets import Dataset
from trl import DPOTrainer, DPOConfig
import torch
# Model loading function
def load_model():
print("Initializing model loading...")
model_name = "outputs_sample_code/checkpoint-200"
max_seq_length = 512
model, tokenizer = FastLanguageModel.from_pretrained(
model_name,
dtype=None,
load_in_4bit=True
)
print("Model and tokenizer loaded successfully.")
print(f"Model type: {type(model)}, Tokenizer type: {type(tokenizer)}")
if hasattr(model, 'config'):
print("Setting max_seq_length in model.config")
model.config.max_seq_length = max_seq_length
else:
print("Error: model.config does not exist!")
model = FastLanguageModel.get_peft_model(
model,
r=32,
target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
lora_alpha=32,
lora_dropout=0.05,
bias="none",
use_gradient_checkpointing="unsloth",
random_state=3407,
use_rslora=False,
loftq_config=None,
max_seq_length=max_seq_length
)
print("PEFT model configured.")
return model, tokenizer
# Dataset loading function
def load_dataset():
print("Loading dataset...")
dataset_name = "cyberagent/chatbot-arena-ja-calm2-7b-chat-experimental"
from datasets import load_dataset
dataset = load_dataset(dataset_name)
formatted_data = []
for item in dataset["train"]:
formatted_data.append({
"prompt": item.get("prompt", ""),
"chosen": item.get("response_winner", ""),
"rejected": item.get("response_loser", "")
})
print(f"Formatted data: {len(formatted_data)} items")
return Dataset.from_dict({
"prompt": [item["prompt"] for item in formatted_data],
"chosen": [item["chosen"] for item in formatted_data],
"rejected": [item["rejected"] for item in formatted_data]
})
# DPO training function
def train_dpo(model, tokenizer, dataset):
print("Configuring training arguments...")
training_args = DPOConfig(
output_dir="./dpo_trained_model_1216",
overwrite_output_dir=True,
per_device_train_batch_size=8,
gradient_accumulation_steps=128,
per_device_eval_batch_size=8,
learning_rate=1e-5,
weight_decay=0.01,
num_train_epochs=1,
lr_scheduler_type="constant_with_warmup",
warmup_steps=10,
fp16=True,
eval_strategy="steps",
save_strategy="steps",
save_steps=32,
logging_steps=8,
eval_steps=8,
load_best_model_at_end=True,
save_safetensors=False,
save_only_model=True,
remove_unused_columns=False,
)
print("Training arguments configured.")
print("Initializing DPOTrainer...")
dpo_trainer = DPOTrainer(
model=model,
args=training_args,
beta=0.3,
train_dataset=dataset,
eval_dataset=dataset,
tokenizer=tokenizer,
max_prompt_length=162,
max_length=512,
loss_type="sigmoid",
label_smoothing=0.0,
)
print("DPOTrainer initialized.")
print("Starting training...")
original_forward = model.forward
def new_forward(*args, **kwargs):
if "input_ids" in kwargs:
kwargs["input_ids"] = kwargs["input_ids"].long()
return original_forward(*args, **kwargs)
model.forward = new_forward
dpo_trainer.train()
print("Training completed.")
if __name__ == "__main__":
print("Loading model...")
model, tokenizer = load_model()
print("Loading dataset...")
dataset = load_dataset()
print("Starting DPO training...")
train_dpo(model, tokenizer, dataset)
print("Training complete. Model saved.")