Spaces:
Sleeping
Sleeping
File size: 3,312 Bytes
c41146d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
# run as a module using: python3 -m scripts.finetune
# Using: https://huggingface.co/blog/mlabonne/sft-llama3
import torch
from trl import SFTTrainer
from datasets import load_dataset
from transformers import TrainingArguments, TextStreamer
from unsloth.chat_templates import get_chat_template
from unsloth import FastLanguageModel, is_bfloat16_supported
from data.fine_tune_dataset import load_data
def finetune(model="unsloth/Meta-Llama-3.1-8B-bnb-4bit", dataset="mlabonne/FineTome-100k"):
hf_token = ""
# Loading the model and restricting context window
max_seq_length = 2048
model, tokenizer = FastLanguageModel.from_pretrained(
model_name=model,
max_seq_length=max_seq_length,
load_in_4bit=True,
dtype=None,
)
# Loading prepared dataset
dataset = load_data(dataset, tokenizer)
# Loading the model for fine tuning - only set to FT 42million/8billion parameters
model = FastLanguageModel.get_peft_model(
model,
r=16, # rank determines LoRA (Low rank adaptation - freezing much of the model for fine tuning) matrix size, higher increases memory and compute cost
lora_alpha=16, # scaling factor for updates
lora_dropout=0, # not used for speedup
target_modules=["q_proj", "k_proj", "v_proj", "up_proj", "down_proj", "o_proj", "gate_proj"], # where LoRA targets
use_rslora=True, # rank stabilised
use_gradient_checkpointing="unsloth"
)
# Saving the untrained model, save_method can be lora to only save adapters or merged (16 or 4 bit)
model.save_pretrained_merged("models/PreFineLlama-3.1-8B", tokenizer, save_method="merged_16bit") # save to models directory locally
model.push_to_hub_merged("thebigoed/PreFineLlama-3.1-8B", tokenizer, token=hf_token, save_method="merged_16bit")
trainer=SFTTrainer(
model=model,
tokenizer=tokenizer,
train_dataset=dataset,
dataset_text_field="text",
max_seq_length=max_seq_length,
dataset_num_proc=2,
packing=True,
args=TrainingArguments(
learning_rate=3e-4, # to low = slow and local minima, too high = unstable
lr_scheduler_type="linear", # adjusts the learning rate (linear and cosine are most popular)
per_device_train_batch_size=8,
gradient_accumulation_steps=2,
num_train_epochs=1,
fp16=not is_bfloat16_supported(),
bf16=is_bfloat16_supported(),
logging_steps=1,
optim="adamw_8bit",
weight_decay=0.01,
warmup_steps=10,
output_dir="output",
seed=0,
),
)
trainer.train()
# Saving the model, save_method can be lora to only save adapters or merged (16 or 4 bit)
model.save_pretrained_merged("models/FineLlama-3.1-8B", tokenizer, save_method="merged_16bit") # save to models directory locally
model.push_to_hub_merged("thebigoed/FineLlama-3.1-8B", tokenizer, token=hf_token, save_method="merged_16bit")
# Use to save in GGUF quantised format
# quant_methods = ["q2_k", "q3_k_m", "q4_k_m", "q5_k_m", "q6_k", "q8_0"]
# for quant in quant_methods:
# model.push_to_hub_gguf("", tokenizer, quant)
return
if __name__ == "__main__":
finetune()
|