from datasets import load_dataset from peft import LoraConfig, prepare_model_for_kbit_training, TaskType from transformers import ( AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments, set_seed, pipeline ) from trl import SFTTrainer, SFTConfig from random import randrange import torch import wandb cache_dir = "./../cache" model_id = "microsoft/Phi-3-mini-4k-instruct" new_model = "python-phi-3-mini-4k-instruct" username = "ellipticaloranges" device_map = {"": 0} hf_model_repo = username + "/" + new_model ## ------------------------LoRA Configs------------------------------------------------------ lora_r = 16 lora_alpha = 16 lora_dropout = 0.05 target_modules= ['k_proj', 'q_proj', 'v_proj', 'o_proj', "gate_proj", "down_proj", "up_proj"] ## ------------------------------------------------------------------------------------------ dataset_name = "flytech/python-codes-25k" dataset_split= "train" dataset = load_dataset(dataset_name, split=dataset_split, cache_dir=cache_dir) print(f"Dataset size: {len(dataset)}") tokenizer = AutoTokenizer.from_pretrained(model_id, cache_dir=cache_dir, trust_remote_code=True, add_eos_token=True, use_fast=True) # The padding token is set to the unknown token. tokenizer.pad_token = tokenizer.unk_token # The ID of the padding token is set to the ID of the unknown token. tokenizer.pad_token_id = tokenizer.convert_tokens_to_ids(tokenizer.pad_token) # ValueError: You are attempting to perform batched generation with padding_side='right' this may lead to unexpected behaviour for Flash Attention version of Phi3. Make sure to call `tokenizer.padding_side = 'left'` before tokenizing the input. tokenizer.padding_side = 'left' def create_message_column(row): messages = [] user = { "content": f"{row['instruction']}", "role": "user" } messages.append(user) assistant = { "content": f"{row['input']}\n{row['output']}", "role": "assistant" } messages.append(assistant) return {"messages": messages} def format_dataset_chatml(row): return {"text": tokenizer.apply_chat_template(row["messages"], add_generation_prompt=False, tokenize=False)} dataset_chatml = dataset.map(create_message_column) dataset_chatml = dataset_chatml.map(format_dataset_chatml) dataset_chatml = dataset_chatml.train_test_split(test_size=0.05, seed=1234) # print("Max Seq Length", max(map(lambda x: len(tokenizer.encode(x["text"])), dataset))) if torch.cuda.is_bf16_supported(): compute_dtype = torch.bfloat16 attn_implementation = 'flash_attention_2' else: compute_dtype = torch.float16 attn_implementation = 'sdpa' print(f"Using {compute_dtype} with {attn_implementation} implementation") model = AutoModelForCausalLM.from_pretrained( model_id, torch_dtype = compute_dtype, trust_remote_code = True, device_map = device_map, attn_implementation = attn_implementation, cache_dir = cache_dir ) args = SFTConfig( output_dir="./phi-3-mini-LoRA", eval_strategy="steps", do_eval=True, optim="adamw_torch", per_device_train_batch_size=8, gradient_accumulation_steps=4, per_device_eval_batch_size=8, log_level="debug", save_strategy="epoch", logging_steps=10, learning_rate=1e-4, fp16 = not torch.cuda.is_bf16_supported(), bf16 = torch.cuda.is_bf16_supported(), eval_steps=100, dataset_text_field="text", max_seq_length=512, num_train_epochs=3, warmup_ratio=0.1, lr_scheduler_type="linear", report_to="wandb", seed=42, ) peft_config = LoraConfig( r=lora_r, lora_alpha=lora_alpha, lora_dropout=lora_dropout, task_type=TaskType.CAUSAL_LM, target_modules=target_modules, ) model.add_adapter(peft_config) wandb.init(project = "Phi 3", name = "python-phi-3-lora") trainer = SFTTrainer( model=model, train_dataset=dataset_chatml['train'], eval_dataset=dataset_chatml['test'], peft_config=peft_config, tokenizer=tokenizer, args=args, ) trainer.train() # Save the model to the `output_dir` after training model.save_pretrained("./out/")