Spaces:
Build error
Build error
import argparse | |
import os | |
from pathlib import Path | |
from unsloth import PatchDPOTrainer | |
PatchDPOTrainer() | |
from typing import Any, List, Literal, Optional # noqa: E402 | |
import torch # noqa | |
from datasets import concatenate_datasets, load_dataset # noqa: E402 | |
from huggingface_hub import HfApi # noqa: E402 | |
from huggingface_hub.utils import RepositoryNotFoundError # noqa: E402 | |
from transformers import TextStreamer, TrainingArguments # noqa: E402 | |
from trl import DPOConfig, DPOTrainer, SFTTrainer # noqa: E402 | |
from unsloth import FastLanguageModel, is_bfloat16_supported # noqa: E402 | |
from unsloth.chat_templates import get_chat_template # noqa: E402 | |
alpaca_template = """Below is an instruction that describes a task. Write a response that appropriately completes the request. | |
### Instruction: | |
{} | |
### Response: | |
{}""" | |
def load_model( | |
model_name: str, | |
max_seq_length: int, | |
load_in_4bit: bool, | |
lora_rank: int, | |
lora_alpha: int, | |
lora_dropout: float, | |
target_modules: List[str], | |
chat_template: str, | |
) -> tuple: | |
model, tokenizer = FastLanguageModel.from_pretrained( | |
model_name=model_name, | |
max_seq_length=max_seq_length, | |
load_in_4bit=load_in_4bit, | |
) | |
model = FastLanguageModel.get_peft_model( | |
model, | |
r=lora_rank, | |
lora_alpha=lora_alpha, | |
lora_dropout=lora_dropout, | |
target_modules=target_modules, | |
) | |
tokenizer = get_chat_template( | |
tokenizer, | |
chat_template=chat_template, | |
) | |
return model, tokenizer | |
def finetune( | |
finetuning_type: Literal["sft", "dpo"], | |
model_name: str, | |
output_dir: str, | |
dataset_huggingface_workspace: str, | |
max_seq_length: int = 2048, | |
load_in_4bit: bool = False, | |
lora_rank: int = 32, | |
lora_alpha: int = 32, | |
lora_dropout: float = 0.0, | |
target_modules: List[str] = ["q_proj", "k_proj", "v_proj", "up_proj", "down_proj", "o_proj", "gate_proj"], # noqa: B006 | |
chat_template: str = "chatml", | |
learning_rate: float = 3e-4, | |
num_train_epochs: int = 3, | |
per_device_train_batch_size: int = 2, | |
gradient_accumulation_steps: int = 8, | |
beta: float = 0.5, # Only for DPO | |
is_dummy: bool = True, | |
) -> tuple: | |
model, tokenizer = load_model( | |
model_name, max_seq_length, load_in_4bit, lora_rank, lora_alpha, lora_dropout, target_modules, chat_template | |
) | |
EOS_TOKEN = tokenizer.eos_token | |
print(f"Setting EOS_TOKEN to {EOS_TOKEN}") # noqa | |
if is_dummy is True: | |
num_train_epochs = 1 | |
print(f"Training in dummy mode. Setting num_train_epochs to '{num_train_epochs}'") # noqa | |
print(f"Training in dummy mode. Reducing dataset size to '400'.") # noqa | |
if finetuning_type == "sft": | |
def format_samples_sft(examples): | |
text = [] | |
for instruction, output in zip(examples["instruction"], examples["output"], strict=False): | |
message = alpaca_template.format(instruction, output) + EOS_TOKEN | |
text.append(message) | |
return {"text": text} | |
dataset1 = load_dataset(f"{dataset_huggingface_workspace}/llmtwin", split="train") | |
dataset2 = load_dataset("mlabonne/FineTome-Alpaca-100k", split="train[:10000]") | |
dataset = concatenate_datasets([dataset1, dataset2]) | |
if is_dummy: | |
dataset = dataset.select(range(400)) | |
print(f"Loaded dataset with {len(dataset)} samples.") # noqa | |
dataset = dataset.map(format_samples_sft, batched=True, remove_columns=dataset.column_names) | |
dataset = dataset.train_test_split(test_size=0.05) | |
print("Training dataset example:") # noqa | |
print(dataset["train"][0]) # noqa | |
trainer = SFTTrainer( | |
model=model, | |
tokenizer=tokenizer, | |
train_dataset=dataset["train"], | |
eval_dataset=dataset["test"], | |
dataset_text_field="text", | |
max_seq_length=max_seq_length, | |
dataset_num_proc=2, | |
packing=True, | |
args=TrainingArguments( | |
learning_rate=learning_rate, | |
num_train_epochs=num_train_epochs, | |
per_device_train_batch_size=per_device_train_batch_size, | |
gradient_accumulation_steps=gradient_accumulation_steps, | |
fp16=not is_bfloat16_supported(), | |
bf16=is_bfloat16_supported(), | |
logging_steps=1, | |
optim="adamw_8bit", | |
weight_decay=0.01, | |
lr_scheduler_type="linear", | |
per_device_eval_batch_size=per_device_train_batch_size, | |
warmup_steps=10, | |
output_dir=output_dir, | |
report_to="comet_ml", | |
seed=0, | |
), | |
) | |
elif finetuning_type == "dpo": | |
PatchDPOTrainer() | |
def format_samples_dpo(example): | |
example["prompt"] = alpaca_template.format(example["prompt"], "") | |
example["chosen"] = example["chosen"] + EOS_TOKEN | |
example["rejected"] = example["rejected"] + EOS_TOKEN | |
return {"prompt": example["prompt"], "chosen": example["chosen"], "rejected": example["rejected"]} | |
dataset = load_dataset(f"{dataset_huggingface_workspace}/llmtwin-dpo", split="train") | |
if is_dummy: | |
dataset = dataset.select(range(400)) | |
print(f"Loaded dataset with {len(dataset)} samples.") # noqa | |
dataset = dataset.map(format_samples_dpo) | |
dataset = dataset.train_test_split(test_size=0.05) | |
print("Training dataset example:") # noqa | |
print(dataset["train"][0]) # noqa | |
trainer = DPOTrainer( | |
model=model, | |
ref_model=None, | |
tokenizer=tokenizer, | |
beta=beta, | |
train_dataset=dataset["train"], | |
eval_dataset=dataset["test"], | |
max_length=max_seq_length // 2, | |
max_prompt_length=max_seq_length // 2, | |
args=DPOConfig( | |
learning_rate=learning_rate, | |
num_train_epochs=num_train_epochs, | |
per_device_train_batch_size=per_device_train_batch_size, | |
gradient_accumulation_steps=gradient_accumulation_steps, | |
fp16=not is_bfloat16_supported(), | |
bf16=is_bfloat16_supported(), | |
optim="adamw_8bit", | |
weight_decay=0.01, | |
lr_scheduler_type="linear", | |
per_device_eval_batch_size=per_device_train_batch_size, | |
warmup_steps=10, | |
output_dir=output_dir, | |
eval_steps=0.2, | |
logging_steps=1, | |
report_to="comet_ml", | |
seed=0, | |
), | |
) | |
else: | |
raise ValueError("Invalid finetuning_type. Choose 'sft' or 'dpo'.") | |
trainer.train() | |
return model, tokenizer | |
def inference( | |
model: Any, | |
tokenizer: Any, | |
prompt: str = "Write a paragraph to introduce supervised fine-tuning.", | |
max_new_tokens: int = 256, | |
) -> None: | |
model = FastLanguageModel.for_inference(model) | |
message = alpaca_template.format(prompt, "") | |
inputs = tokenizer([message], return_tensors="pt").to("cuda") | |
text_streamer = TextStreamer(tokenizer) | |
_ = model.generate(**inputs, streamer=text_streamer, max_new_tokens=max_new_tokens, use_cache=True) | |
def save_model(model: Any, tokenizer: Any, output_dir: str, push_to_hub: bool = False, repo_id: Optional[str] = None): | |
model.save_pretrained_merged(output_dir, tokenizer, save_method="merged_16bit") | |
if push_to_hub and repo_id: | |
print(f"Saving model to '{repo_id}'") # noqa | |
model.push_to_hub_merged(repo_id, tokenizer, save_method="merged_16bit") | |
def check_if_huggingface_model_exists(model_id: str, default_value: str = "mlabonne/TwinLlama-3.1-8B") -> str: | |
api = HfApi() | |
try: | |
api.model_info(model_id) | |
except RepositoryNotFoundError: | |
print(f"Model '{model_id}' does not exist.") # noqa | |
model_id = default_value | |
print(f"Defaulting to '{model_id}'") # noqa | |
print("Train your own 'TwinLlama-3.1-8B' to avoid this behavior.") # noqa | |
return model_id | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser() | |
parser.add_argument("--num_train_epochs", type=int, default=3) | |
parser.add_argument("--per_device_train_batch_size", type=int, default=2) | |
parser.add_argument("--learning_rate", type=float, default=3e-4) | |
parser.add_argument("--dataset_huggingface_workspace", type=str, default="mlabonne") | |
parser.add_argument("--model_output_huggingface_workspace", type=str, default="mlabonne") | |
parser.add_argument("--is_dummy", type=bool, default=False, help="Flag to reduce the dataset size for testing") | |
parser.add_argument( | |
"--finetuning_type", | |
type=str, | |
choices=["sft", "dpo"], | |
default="sft", | |
help="Parameter to choose the finetuning stage.", | |
) | |
parser.add_argument("--output_data_dir", type=str, default=os.environ["SM_OUTPUT_DATA_DIR"]) | |
parser.add_argument("--model_dir", type=str, default=os.environ["SM_MODEL_DIR"]) | |
parser.add_argument("--n_gpus", type=str, default=os.environ["SM_NUM_GPUS"]) | |
args = parser.parse_args() | |
print(f"Num training epochs: '{args.num_train_epochs}'") # noqa | |
print(f"Per device train batch size: '{args.per_device_train_batch_size}'") # noqa | |
print(f"Learning rate: {args.learning_rate}") # noqa | |
print(f"Datasets will be loaded from Hugging Face workspace: '{args.dataset_huggingface_workspace}'") # noqa | |
print(f"Models will be saved to Hugging Face workspace: '{args.model_output_huggingface_workspace}'") # noqa | |
print(f"Training in dummy mode? '{args.is_dummy}'") # noqa | |
print(f"Finetuning type: '{args.finetuning_type}'") # noqa | |
print(f"Output data dir: '{args.output_data_dir}'") # noqa | |
print(f"Model dir: '{args.model_dir}'") # noqa | |
print(f"Number of GPUs: '{args.n_gpus}'") # noqa | |
if args.finetuning_type == "sft": | |
print("Starting SFT training...") # noqa | |
base_model_name = "meta-llama/Meta-Llama-3.1-8B" | |
print(f"Training from base model '{base_model_name}'") # noqa | |
output_dir_sft = Path(args.model_dir) / "output_sft" | |
model, tokenizer = finetune( | |
finetuning_type="sft", | |
model_name=base_model_name, | |
output_dir=str(output_dir_sft), | |
dataset_huggingface_workspace=args.dataset_huggingface_workspace, | |
num_train_epochs=args.num_train_epochs, | |
per_device_train_batch_size=args.per_device_train_batch_size, | |
learning_rate=args.learning_rate, | |
) | |
inference(model, tokenizer) | |
sft_output_model_repo_id = f"{args.model_output_huggingface_workspace}/TwinLlama-3.1-8B" | |
save_model(model, tokenizer, "model_sft", push_to_hub=True, repo_id=sft_output_model_repo_id) | |
elif args.finetuning_type == "dpo": | |
print("Starting DPO training...") # noqa | |
sft_base_model_repo_id = f"{args.model_output_huggingface_workspace}/TwinLlama-3.1-8B" | |
sft_base_model_repo_id = check_if_huggingface_model_exists(sft_base_model_repo_id) | |
print(f"Training from base model '{sft_base_model_repo_id}'") # noqa | |
output_dir_dpo = Path(args.model_dir) / "output_dpo" | |
model, tokenizer = finetune( | |
finetuning_type="dpo", | |
model_name=sft_base_model_repo_id, | |
output_dir=str(output_dir_dpo), | |
dataset_huggingface_workspace=args.dataset_huggingface_workspace, | |
num_train_epochs=1, | |
per_device_train_batch_size=args.per_device_train_batch_size, | |
learning_rate=2e-6, | |
is_dummy=args.is_dummy, | |
) | |
inference(model, tokenizer) | |
dpo_output_model_repo_id = f"{args.model_output_huggingface_workspace}/TwinLlama-3.1-8B-DPO" | |
save_model(model, tokenizer, "model_dpo", push_to_hub=True, repo_id=dpo_output_model_repo_id) | |