Uploaded model
- Developed by: kenakayama
- License: apache-2.0
- Finetuned from model : llm-jp/llm-jp-3-13b
- Training Dataset: ichikara-instruction (https://liat-aip.sakura.ne.jp/wp/llm%E3%81%AE%E3%81%9F%E3%82%81%E3%81%AE%E6%97%A5%E6%9C%AC%E8%AA%9E%E3%82%A4%E3%83%B3%E3%82%B9%E3%83%88%E3%83%A9%E3%82%AF%E3%82%B7%E3%83%A7%E3%83%B3%E3%83%87%E3%83%BC%E3%82%BF%E4%BD%9C%E6%88%90/)
This llama model was trained 2x faster with Unsloth and Huggingface's TRL library.
Usage
This script has been tested on Google Colab. Please modify the INITIAL SETTING section to match your environment. The inference results will be saved to the path specified in OUTPUTPATH.
from google.colab import drive
drive.mount('/content/drive')
##### INITIAL SETTING #####
HF_TOKEN = Your Hagging Face Token
TASKPATH = "/content/drive/MyDrive/elyza-tasks-100-TV_0.jsonl"
OUTPUTPATH = "/content/drive/MyDrive/inference_output.jsonl"
##### ##### #### ##### #####
!pip install unsloth
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install -U torch
!pip install -U peft
from unsloth import FastLanguageModel
from peft import PeftModel
import torch
import json
from tqdm import tqdm
import re
model_id = "llm-jp/llm-jp-3-13b"
adapter_id = "kenakayama/llm-jp-3-13b-1216_lora"
dtype = None
load_in_4bit = True
model, tokenizer = FastLanguageModel.from_pretrained(
model_name=model_id,
dtype=dtype,
load_in_4bit=load_in_4bit,
trust_remote_code=True,
)
model = PeftModel.from_pretrained(model, adapter_id, token = HF_TOKEN)
datasets = []
with open(TASKPATH, "r") as f:
item = ""
for line in f:
line = line.strip()
item += line
if item.endswith("}"):
datasets.append(json.loads(item))
item = ""
FastLanguageModel.for_inference(model)
results = []
for dt in tqdm(datasets):
input = dt["input"]
prompt = f"""次の指示内で決められた回答形式に厳密に沿った回答をしてください。### 指示\n{input}\n### 回答\n"""
inputs = tokenizer([prompt], return_tensors = "pt").to(model.device)
outputs = model.generate(**inputs, max_new_tokens = 1024, use_cache = True, do_sample=False, repetition_penalty=1.2)
prediction = tokenizer.decode(outputs[0], skip_special_tokens=True).split('\n### 回答')[-1]
results.append({"task_id": dt["task_id"], "input": input, "output": prediction})
with open(OUTPUTPATH, 'w', encoding='utf-8') as f:
for result in results:
json.dump(result, f, ensure_ascii=False)
f.write('\n')
Model tree for kenakayama/llm-jp-3-13b-1216_lora
Base model
llm-jp/llm-jp-3-13b