Sample Use
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
BitsAndBytesConfig,
)
import torch
from datasets import load_dataset
from tqdm import tqdm
import json
import re
# Hugging Face Token
HF_TOKEN = "your_token"
# モデルを読み込み
model_id = "guTakuto/llm-jp-3-13b-finetune"
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16,
)
model = AutoModelForCausalLM.from_pretrained(
model_id,
quantization_config=bnb_config,
device_map="auto"
use_auth_token=HF_TOKEN,
)
tokenizer = AutoTokenizer.from_pretrained(
model_id,
trust_remote_code=True,
use_auth_token=HF_TOKEN,
)
# タスクデータの読み込み
datasets = []
with open("elyza-tasks-100-TV_0.jsonl", "r") as f:
item = ""
for line in f:
line = line.strip()
item += line
if item.endswith("}"):
datasets.append(json.loads(item))
item = ""
# モデルによるタスクの推論
results = []
for data in tqdm(datasets):
inputs = data["input"]
prompt = f"""### 指示
{inputs}
### 回答
"""
tokenized_input = tokenizer.encode(prompt, add_special_tokens=False, return_tensors="pt").to(model.device)
attention_mask = torch.ones_like(tokenized_input)
with torch.no_grad():
regenerate_count = 0 # 再生成回数をカウントする変数
max_attempts = 5 # 再生成の最大回数
while True: # 結果が空でない場合にループを抜ける
outputs = model.generate(
tokenized_input,
attention_mask=attention_mask,
max_new_tokens=200,
do_sample=False,
repetition_penalty=1.2,
pad_token_id=tokenizer.eos_token_id
)[0]
# トークン列をデコード
output = tokenizer.decode(outputs[tokenized_input.size(1):], skip_special_tokens=True)
print(f"output: {output}")
# 空でない場合にループを抜ける
if output.strip():
break
# 再生成回数をカウントし、メッセージを表示
regenerate_count += 1
print(f"Output is empty. Regenerating... (Attempt {regenerate_count})")
# 最大再生成回数を超えた場合に強制終了
if regenerate_count >= max_attempts:
print("Maximum regeneration attempts reached. Exiting loop.")
break
results.append({"task_id": data["task_id"], "input": inputs, "output": output})
# jsol生成
jsonl_id = re.sub(".*/", "", model_id)
with open(f"jsonl_output/{jsonl_id}-outputs.jsonl", 'w', encoding='utf-8') as f:
for result in results:
json.dump(result, f, ensure_ascii=False)
f.write('\n')
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social
visibility and check back later, or deploy to Inference Endpoints (dedicated)
instead.
Model tree for guTakuto/llm-jp-3-13b-finetune
Base model
llm-jp/llm-jp-3-13b