Uploaded model

Developed by: kenakayama
License: apache-2.0
Finetuned from model : llm-jp/llm-jp-3-13b
Training Dataset: ichikara-instruction (https://liat-aip.sakura.ne.jp/wp/llm%E3%81%AE%E3%81%9F%E3%82%81%E3%81%AE%E6%97%A5%E6%9C%AC%E8%AA%9E%E3%82%A4%E3%83%B3%E3%82%B9%E3%83%88%E3%83%A9%E3%82%AF%E3%82%B7%E3%83%A7%E3%83%B3%E3%83%87%E3%83%BC%E3%82%BF%E4%BD%9C%E6%88%90/)

This llama model was trained 2x faster with Unsloth and Huggingface's TRL library.

Usage

This script has been tested on Google Colab. Please modify the INITIAL SETTING section to match your environment. The inference results will be saved to the path specified in OUTPUTPATH.

from google.colab import drive
drive.mount('/content/drive')

##### INITIAL SETTING #####
HF_TOKEN = Your Hagging Face Token
TASKPATH = "/content/drive/MyDrive/elyza-tasks-100-TV_0.jsonl"
OUTPUTPATH = "/content/drive/MyDrive/inference_output.jsonl"
##### ##### #### ##### #####

!pip install unsloth
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install -U torch
!pip install -U peft

from unsloth import FastLanguageModel
from peft import PeftModel
import torch
import json
from tqdm import tqdm
import re

model_id = "llm-jp/llm-jp-3-13b"
adapter_id = "kenakayama/llm-jp-3-13b-1216_lora"

dtype = None
load_in_4bit = True

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=model_id,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
    trust_remote_code=True,
)

model = PeftModel.from_pretrained(model, adapter_id, token = HF_TOKEN)

datasets = []
with open(TASKPATH, "r") as f:
    item = ""
    for line in f:
      line = line.strip()
      item += line
      if item.endswith("}"):
        datasets.append(json.loads(item))
        item = ""


FastLanguageModel.for_inference(model)

results = []
for dt in tqdm(datasets):
  input = dt["input"]

  prompt = f"""次の指示内で決められた回答形式に厳密に沿った回答をしてください。### 指示\n{input}\n### 回答\n"""

  inputs = tokenizer([prompt], return_tensors = "pt").to(model.device)

  outputs = model.generate(**inputs, max_new_tokens = 1024, use_cache = True, do_sample=False, repetition_penalty=1.2)
  prediction = tokenizer.decode(outputs[0], skip_special_tokens=True).split('\n### 回答')[-1]

  results.append({"task_id": dt["task_id"], "input": input, "output": prediction})

with open(OUTPUTPATH, 'w', encoding='utf-8') as f:
    for result in results:
        json.dump(result, f, ensure_ascii=False)
        f.write('\n')

kenakayama
/

llm-jp-3-13b-1216_lora

Uploaded model

Usage

Model tree for kenakayama/llm-jp-3-13b-1216_lora