Sample Use

from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
)
import torch
from datasets import load_dataset
from tqdm import tqdm
import json
import re

# Hugging Face Token
HF_TOKEN = "your_token"

# モデルを読み込み
model_id = "guTakuto/llm-jp-3-13b-finetune" 
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto"
    use_auth_token=HF_TOKEN,
)
tokenizer = AutoTokenizer.from_pretrained(
    model_id,
    trust_remote_code=True,
    use_auth_token=HF_TOKEN,
)

# タスクデータの読み込み
datasets = []
with open("elyza-tasks-100-TV_0.jsonl", "r") as f:
    item = ""
    for line in f:
        line = line.strip()
        item += line
        if item.endswith("}"):
            datasets.append(json.loads(item))
            item = ""

# モデルによるタスクの推論
results = []
for data in tqdm(datasets):
    inputs = data["input"]
    prompt = f"""### 指示
    {inputs}
    ### 回答
    """
    tokenized_input = tokenizer.encode(prompt, add_special_tokens=False, return_tensors="pt").to(model.device)
    attention_mask = torch.ones_like(tokenized_input)
    with torch.no_grad():
        regenerate_count = 0  # 再生成回数をカウントする変数
        max_attempts = 5     # 再生成の最大回数
        while True:  # 結果が空でない場合にループを抜ける
            outputs = model.generate(
                tokenized_input,
                attention_mask=attention_mask,
                max_new_tokens=200,
                do_sample=False,
                repetition_penalty=1.2,
                pad_token_id=tokenizer.eos_token_id
            )[0]
            # トークン列をデコード
            output = tokenizer.decode(outputs[tokenized_input.size(1):], skip_special_tokens=True)
            print(f"output: {output}")
            # 空でない場合にループを抜ける
            if output.strip():
                break
            # 再生成回数をカウントし、メッセージを表示
            regenerate_count += 1
            print(f"Output is empty. Regenerating... (Attempt {regenerate_count})")
            # 最大再生成回数を超えた場合に強制終了
            if regenerate_count >= max_attempts:
                print("Maximum regeneration attempts reached. Exiting loop.")
                break            
    results.append({"task_id": data["task_id"], "input": inputs, "output": output})
    
# jsol生成
jsonl_id = re.sub(".*/", "", model_id)
with open(f"jsonl_output/{jsonl_id}-outputs.jsonl", 'w', encoding='utf-8') as f:
    for result in results:
        json.dump(result, f, ensure_ascii=False)
        f.write('\n')
Downloads last month

-

Downloads are not tracked for this model. How to track
Inference Examples
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social visibility and check back later, or deploy to Inference Endpoints (dedicated) instead.

Model tree for guTakuto/llm-jp-3-13b-finetune

Finetuned
(1141)
this model