|
import torch |
|
from accelerate import Accelerator |
|
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments |
|
from peft import LoraConfig |
|
from trl import is_xpu_available |
|
|
|
|
|
|
|
device_map = ( |
|
{"": f"xpu:{Accelerator().local_process_index}"} |
|
if is_xpu_available() |
|
else {"": Accelerator().local_process_index} |
|
) |
|
|
|
torch_dtype = torch.bfloat16 |
|
|
|
quantization_config = BitsAndBytesConfig( |
|
load_in_4bit=True |
|
) |
|
|
|
training_args = TrainingArguments( |
|
output_dir = './output1', |
|
per_device_train_batch_size = 2, |
|
gradient_accumulation_steps = 1, |
|
learning_rate = 2e-4, |
|
logging_steps = 1, |
|
num_train_epochs = 3, |
|
max_steps = -1, |
|
|
|
save_steps = 200_000, |
|
save_total_limit = 10, |
|
push_to_hub = False, |
|
hub_model_id = None, |
|
gradient_checkpointing = False, |
|
gradient_checkpointing_kwargs = dict(use_reentrant=False), |
|
fp16 = False, |
|
bf16 = False, |
|
) |
|
|
|
peft_config = LoraConfig( |
|
r = 16, |
|
lora_alpha = 32, |
|
bias = "none", |
|
task_type = "CAUSAL_LM", |
|
target_modules = ['q_proj', 'k_proj', 'v_proj', 'o_proj'] |
|
) |
|
|
|
model_name = 'mistralai/Mixtral-8x7B-Instruct-v0.1' |
|
model = AutoModelForCausalLM.from_pretrained( |
|
model_name, |
|
quantization_config = quantization_config, |
|
device_map = device_map, |
|
trust_remote_code = False, |
|
torch_dtype = torch_dtype |
|
) |
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True) |
|
tokenizer.pad_token = tokenizer.eos_token |
|
tokenizer.padding_side = 'right' |
|
|
|
|
|
|
|
from datasets import load_dataset, DatasetDict, concatenate_datasets |
|
import pandas as pd |
|
|
|
|
|
filenames = ['./select-1.csv', './select-2.csv', './select-3.csv'] |
|
|
|
|
|
split_datasets = {'train': [], 'validation': []} |
|
|
|
for filename in filenames: |
|
|
|
dataset = load_dataset('csv', data_files=filename, split='train') |
|
|
|
|
|
split = dataset.train_test_split(test_size=0.2, seed=42) |
|
|
|
|
|
split_datasets['train'].append(split['train']) |
|
split_datasets['validation'].append(split['test']) |
|
|
|
|
|
train_dataset = concatenate_datasets(split_datasets['train']) |
|
eval_dataset = concatenate_datasets(split_datasets['validation']) |
|
|
|
|
|
|
|
from trl import SFTTrainer |
|
|
|
trainer = SFTTrainer( |
|
model = model, |
|
args = training_args, |
|
max_seq_length = 512, |
|
train_dataset = train_dataset, |
|
eval_dataset = eval_dataset, |
|
dataset_text_field = 'text', |
|
peft_config = peft_config, |
|
tokenizer = tokenizer |
|
) |
|
|
|
trainer.train() |
|
trainer.save_model('./output1') |
|
|