code in Google Colab

''' python

!pip uninstall unsloth -y !pip install --upgrade --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git" !pip install --upgrade torch !pip install --upgrade xformers !pip install ipywidgets --upgrade

#Install Flash Attention 2 for softcapping support

import torch if torch.cuda.get_device_capability()[0] >= 8: !pip install --no-deps packaging ninja einops "flash-attn>=2.6.3"

#llm-jp/llm-jp-3-13bを4bit量子化のqLoRA設定でロード。

from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig from unsloth import FastLanguageModel import torch max_seq_length = 512 dtype = None load_in_4bit = True

model_id = "llm-jp/llm-jp-3-13b" new_model_id = "llm-jp-3-13b-finetune-2" model, tokenizer = FastLanguageModel.from_pretrained( model_name=model_id, dtype=dtype, load_in_4bit=load_in_4bit, trust_remote_code=True, ) #SFT用のモデルを用意

model = FastLanguageModel.get_peft_model( model, r = 32, target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj",], lora_alpha = 32, lora_dropout = 0.05, bias = "none", use_gradient_checkpointing = "unsloth", random_state = 3407, use_rslora = False, loftq_config = None, max_seq_length = max_seq_length, )

HF_TOKEN = "YOUR_TOKEN_ID" #@param {type:"string"}

from datasets import load_dataset dataset = load_dataset("json", data_files="/content/ichikara-instruction-003-001-1.json")

#学習時のプロンプトフォーマットの定義 prompt = """### 指示 {} ###回答 {}"""

""" formatting_prompts_func: 各データをプロンプトに合わせた形式に合わせる """ EOS_TOKEN = tokenizer.eos_token # トークナイザーのEOSトークン(文末トークン) def formatting_prompts_func(examples): input = examples["text"] # 入力データ output = examples["output"] # 出力データ text = prompt.format(input, output) + EOS_TOKEN # プロンプトの作成 return { "formatted_text" : text, } # 新しいフィールド "formatted_text" を返す pass

##各データにフォーマットを適用 dataset = dataset.map( formatting_prompts_func, num_proc= 4, # 並列処理数を指定 )

dataset

#データを確認 print(dataset["train"]["formatted_text"][3])

""" training_arguments: 学習の設定

  • output_dir: -トレーニング後のモデルを保存するディレクトリ

  • per_device_train_batch_size:

    • デバイスごとのトレーニングバッチサイズ
  • per_device_eval_batch_size:

    • デバイスごとの評価バッチサイズ
  • gradient_accumulation_steps:

    • 勾配を更新する前にステップを積み重ねる回数
  • optim:

    • オプティマイザの設定
  • num_train_epochs:

    • エポック数
  • eval_strategy:

    • 評価の戦略 ("no"/"steps"/"epoch")
  • eval_steps:

    • eval_strategyが"steps"のとき、評価を行うstep間隔
  • logging_strategy:

    • ログ記録の戦略
  • logging_steps:

    • ログを出力するステップ間隔
  • warmup_steps:

    • 学習率のウォームアップステップ数
  • save_steps:

    • モデルを保存するステップ間隔
  • save_total_limit:

    • 保存しておくcheckpointの数
  • max_steps:

    • トレーニングの最大ステップ数
  • learning_rate:

    • 学習率
  • fp16:

    • 16bit浮動小数点の使用設定(第8回演習を参考にすると良いです)
  • bf16:

    • BFloat16の使用設定
  • group_by_length:

    • 入力シーケンスの長さによりバッチをグループ化 (トレーニングの効率化)
  • report_to:

    • ログの送信先 ("wandb"/"tensorboard"など) """ from trl import SFTTrainer from transformers import TrainingArguments from unsloth import is_bfloat16_supported

trainer = SFTTrainer( model = model, tokenizer = tokenizer, train_dataset=dataset["train"], max_seq_length = max_seq_length, dataset_text_field="formatted_text", packing = False, args = TrainingArguments( per_device_train_batch_size = 2, gradient_accumulation_steps = 4, num_train_epochs = 1, logging_steps = 10, warmup_steps = 10, save_steps=100, save_total_limit=2, max_steps=-1, learning_rate = 2e-4, fp16 = not is_bfloat16_supported(), bf16 = is_bfloat16_supported(), group_by_length=True, seed = 3407, output_dir = "outputs", report_to = "none", ), )

#@title 学習実行 trainer_stats = trainer.train()

#ELYZA-tasks-100-TVの読み込み。 import json datasets = [] with open("./elyza-tasks-100-TV_0.jsonl", "r") as f: item = "" for line in f: line = line.strip() item += line if item.endswith("}"): datasets.append(json.loads(item)) item = ""

#学習したモデルを用いてタスクを実行 from tqdm import tqdm

#推論するためにモデルのモードを変更 FastLanguageModel.for_inference(model)

results = [] for dt in tqdm(datasets): input = dt["input"]

prompt = f"""### 指示\n{input}\n### 回答\n"""

inputs = tokenizer([prompt], return_tensors = "pt").to(model.device)

outputs = model.generate(**inputs, max_new_tokens = 512, use_cache = True, do_sample=False, repetition_penalty=1.2) prediction = tokenizer.decode(outputs[0], skip_special_tokens=True).split('\n### 回答')[-1]

results.append({"task_id": dt["task_id"], "input": input, "output": prediction

#jsonlで保存 with open(f"{new_model_id}_output.jsonl", 'w', encoding='utf-8') as f: for result in results: json.dump(result, f, ensure_ascii=False) f.write('\n

Uploaded model

  • Developed by: Ayappi
  • License: apache-2.0
  • Finetuned from model : llm-jp/llm-jp-3-13b

This llama model was trained 2x faster with Unsloth and Huggingface's TRL library.

Downloads last month

-

Downloads are not tracked for this model. How to track
Inference Providers NEW
This model is not currently available via any of the supported Inference Providers.
The model cannot be deployed to the HF Inference API: The model has no pipeline_tag.

Model tree for Ayappi/llm-jp-3-13b-finetune-2

Finetuned
(1119)
this model