Uploaded model

  • Developed by: takeruh
  • License: apache-2.0
  • Finetuned from model : unsloth/gemma-2-9b

This gemma2 model was trained 2x faster with Unsloth and Huggingface's TRL library.

# 必要なライブラリを読み込み
from unsloth import FastLanguageModel
from peft import PeftModel
import torch
import json
from tqdm import tqdm
import re


# ベースとなるモデルと学習したLoRAのアダプタ(Hugging FaceのIDを指定)。
model_id = "unsloth/gemma-2-9b"
adapter_id = "takeruh/gemma-2-9b-it_lora"


HF_TOKEN = "" #@param {type:"string"}

# # unslothのFastLanguageModelで元のモデルをロード。
dtype = torch.bfloat16
load_in_4bit = False 


# モデルとトークナイザのロード
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=model_id,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
    trust_remote_code=True,
)

# 元のモデルにLoRAのアダプタを統合。
model = PeftModel.from_pretrained(model, adapter_id, token = HF_TOKEN)

# タスクとなるデータの読み込み。
# 事前にデータをアップロードしてください。
datasets = []
# with open("./elyza-tasks-100-TV_0.jsonl", "r") as f:
with open("/workspace/elyza-tasks-100-TV_0.jsonl", "r") as f:

    item = ""
    for line in f:
      line = line.strip()
      item += line
      if item.endswith("}"):
        datasets.append(json.loads(item))
        item = ""



# # ガベージコレクターでメモリ解放
# gc.collect()
# del model
torch.cuda.empty_cache()
import gc

gc.collect()


# 学習したモデルを用いてタスクを実行
from tqdm import tqdm

# 推論するためにモデルのモードを変更
FastLanguageModel.for_inference(model)

# from tqdm import tqdm

batch_size = 8
# batch_size = 34
results = []
for i in tqdm(range(0, len(datasets), batch_size)):
    batch_data = datasets[i:i+batch_size]
    prompts = [f"### 指示\n{dt['input']}\n### 回答\n" for dt in batch_data]

    batch_inputs = tokenizer(
        prompts,
        return_tensors="pt",
        padding=True,
        truncation=True
    ).to(model.device)
    # https://huggingface.co/transformers/v2.9.1/main_classes/model.html
    batch_outputs = model.generate(
        **batch_inputs,
        max_new_tokens=350,
        use_cache=True,
        do_sample=False,
        # repetition_penalty=1.2
        repetition_penalty=1.5
    )

    for dt, output_ids in zip(batch_data, batch_outputs):
        prediction = tokenizer.decode(output_ids, skip_special_tokens=True).split('\n### 回答')[-1]
        result = {"task_id": dt["task_id"], "input": dt["input"], "output": prediction}
        results.append(result)
        # 個々の出力を表示したい場合
        tqdm.write(str(result))



# 結果をjsonlで保存。

# ここではadapter_idを元にファイル名を決定しているが、ファイル名は任意で問題なし。
json_file_id = re.sub(".*/", "", adapter_id)
# /workspace/llm-jp-3-13b-finetune-outputs.jsonl
with open(f"/workspace/{json_file_id}_output2.jsonl", 'w', encoding='utf-8') as f:
    for result in results:
        json.dump(result, f, ensure_ascii=False)
        f.write('\n')
Downloads last month

-

Downloads are not tracked for this model. How to track
Inference Providers NEW
This model is not currently available via any of the supported Inference Providers.
The model cannot be deployed to the HF Inference API: The model has no pipeline_tag.

Model tree for takeruh/gemma-2-9b-it_lora

Base model

unsloth/gemma-2-9b
Finetuned
(230)
this model