Uploaded model
- Developed by: kanbac5
- License: apache-2.0
- Finetuned from model : llm-jp/llm-jp-3-13b
This llama model was trained 2x faster with Unsloth and Huggingface's TRL library.
Sample Use
from unsloth import FastLanguageModel
from peft import PeftModel
import torch
import json
import yaml
from tqdm import tqdm
import re
model_id = "llm-jp/llm-jp-3-13b"
adapter_id = "kanbac5/llm-jp-3-13b-itsample_llm2024_lora"
with open("api_info.yaml", 'r', encoding="utf-8") as yml:
parameters = yaml.safe_load(yml)
HF_TOKEN = parameters["token"]
dtype = None
load_in_4bit = True
model, tokenizer = FastLanguageModel.from_pretrained(
model_name=model_id,
dtype=dtype,
load_in_4bit=load_in_4bit,
trust_remote_code=True,
)
model = PeftModel.from_pretrained(model, adapter_id, token = HF_TOKEN)
datasets = []
with open("data/elyza-tasks-100-TV_0.jsonl", "r") as f:
item = ""
for line in f:
line = line.strip()
item += line
if item.endswith("}"):
datasets.append(json.loads(item))
item = ""
FastLanguageModel.for_inference(model)
results = []
for dt in tqdm(datasets):
input = dt["input"]
prompt = f"""### 指示\n{input}\n### 回答\n"""
inputs = tokenizer([prompt], return_tensors = "pt").to(model.device)
outputs = model.generate(**inputs, max_new_tokens = 512, use_cache = True, do_sample=False, repetition_penalty=1.2)
prediction = tokenizer.decode(outputs[0], skip_special_tokens=True).split('\n### 回答')[-1]
results.append({"task_id": dt["task_id"], "input": input, "output": prediction})
json_file_id = re.sub(".*/", "", adapter_id)
with open(f"{json_file_id}_output_1217.jsonl", 'w', encoding='utf-8') as f:
for result in results:
json.dump(result, f, ensure_ascii=False)
f.write('\n')
Model tree for kanbac5/llm-jp-3-13b-itsample_llm2024_lora
Base model
llm-jp/llm-jp-3-13b