Uploaded model
- Developed by: Ka3456
- License: apache-2.0
- Finetuned from model : llm-jp/llm-jp-3-13b
This llama model was trained 2x faster with Unsloth and Huggingface's TRL library.
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
BitsAndBytesConfig,
)
import torch
import json
from tqdm import tqdm
# Hugging FaceのトークンとモデルID
HF_TOKEN = "your-token" # Hugging Faceトークンを記入
model_id = "Ka3456/practice5_lora" # 新しいモデルID
# LoRAモデルのロード
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16,
bnb_4bit_use_double_quant=False,
)
model = AutoModelForCausalLM.from_pretrained(
model_id,
trust_remote_code=True,
quantization_config=bnb_config,
use_auth_token=HF_TOKEN,
)
tokenizer = AutoTokenizer.from_pretrained(
model_id,
trust_remote_code=True,
use_auth_token=HF_TOKEN,
)
# データセットの読み込み
datasets = []
with open("elyza-tasks-100-TV_0.jsonl", "r") as f:
item = ""
for line in f:
line = line.strip()
item += line
if item.endswith("}"):
datasets.append(json.loads(item))
item = ""
# モデル推論
results = []
for dt in tqdm(datasets):
input_text = dt["input"]
prompt = f"### 指示\n{input_text}\n### 回答\n"
inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
outputs = model.generate(
**inputs,
max_new_tokens=512,
use_cache=True,
do_sample=False,
repetition_penalty=1.2,
)
prediction = tokenizer.decode(outputs[0], skip_special_tokens=True).split("\n### 回答")[-1]
results.append({"task_id": dt["task_id"], "input": input_text, "output": prediction})
# 推論結果を保存
output_file = f"{model_id}_output.jsonl"
with open(output_file, "w", encoding="utf-8") as f:
for result in results:
json.dump(result, f, ensure_ascii=False)
f.write("\n")
print(f"推論結果を {output_file} に保存しました。")
Inference Providers
NEW
This model is not currently available via any of the supported Inference Providers.
The model cannot be deployed to the HF Inference API:
The model has no pipeline_tag.
Model tree for Ka3456/practice5_lora
Base model
llm-jp/llm-jp-3-13b