Usage for Transformers


import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

BASE_MODEL = "sh2orc/Llama-3-Korean-8B"

model = AutoModelForCausalLM.from_pretrained(BASE_MODEL,
    torch_dtype=torch.bfloat16,
    attn_implementation="flash_attention_2", 
    device_map="cuda:0")

tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'right'

instruction = 'ν• λΆ€ 결제 λŒ€ν•΄μ„œ μ„€λͺ…ν•΄μ€˜'

pipe = pipeline("text-generation", 
                model=model, 
                tokenizer=tokenizer, 
                max_new_tokens=1024)

messages = [
    {"role": "user", "content": instruction},
]

prompt = pipe.tokenizer.apply_chat_template(
        messages, 
        tokenize=False, 
        add_generation_prompt=True
)

outputs = pipe(
    prompt,
    do_sample=True,
    temperature=0.8,
    top_k=10,
    top_p=0.9,
    add_special_tokens=True,
    eos_token_id = [ 
        pipe.tokenizer.eos_token_id,
        pipe.tokenizer.convert_tokens_to_ids("<|eot_id|>")
    ]
)

print(outputs[0]['generated_text'][len(prompt):])
print(outputs[0]['generated_text'][len(prompt):])

Result

ν• λΆ€ μš”κΈˆμ€ 물건을 μ‚΄ λ•Œ, κ·Έ 값을 일정 κΈ°κ°„ λ™μ•ˆ λ‚˜λˆ μ„œ μ§€λΆˆν•˜λŠ” λ°©μ‹μž…λ‹ˆλ‹€. 예λ₯Ό λ“€μ–΄, 50λ§Œμ›μ§œλ¦¬ μ œν’ˆμ„ 10κ°œμ›” ν• λΆ€λ‘œ κ΅¬λ§€ν•œλ‹€λ©΄, 각 λ‹¬λ§ˆλ‹€ 5λ§Œμ›μ”© 10κ°œμ›”μ΄ λ™μ•ˆ μ§€λΆˆν•˜κ²Œ λ©λ‹ˆλ‹€. μ΄λ•Œ, ν• λΆ€ μš”κΈˆμ€ 일정 κΈ°κ°„ λ™μ•ˆ 이자 없이 물건을 μ‚¬μš©ν•  수 μžˆλŠ” 이점이 μžˆμ§€λ§Œ, λ™μ‹œμ— μ—°μ²΄λ£Œκ°€ 뢀과될 수 있으며, 채무가 λ°œμƒν•˜κ²Œ λ©λ‹ˆλ‹€. λ”°λΌμ„œ, ν• λΆ€λ₯Ό μ‚¬μš©ν•  λ•ŒλŠ” μžμ‹ μ˜ μž¬μ • μƒνƒœμ™€ ꡬ맀할 물건을 잘 κ³ λ €ν•΄μ•Ό ν•©λ‹ˆλ‹€.

Usage for VLLM

from vllm import LLM, SamplingParams
from transformers import AutoTokenizer, pipeline

BASE_MODEL = "sh2orc/Llama-3-Korean-8B"

llm = LLM(model=BASE_MODEL)

tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'right'

instruction = 'μΉ΄λ“œ ν• λΆ€ κ²°μ œμ— λŒ€ν•΄μ„œ μ•Œλ €μ€˜'

messages = [
    {
      "role": "system",
      "content": "당신은 ν›Œλ₯­ν•œ AI λΉ„μ„œμž…λ‹ˆλ‹€. You are a great AI assistant."
    },
    {
      "role": "user",
      "content": instruction
    }, 
]


prompt_message = tokenizer.apply_chat_template(
        messages, 
        tokenize=False, 
        add_generation_prompt=True,
)

eos_token_id = [tokenizer.eos_token_id, tokenizer.convert_tokens_to_ids("<|eot_id|>")]

outputs = llm.generate(prompt_message, SamplingParams(stop_token_ids=eos_token_id, temperature=0.6, top_p=0.8,max_tokens=4096))

for output in outputs:
    propt = output.prompt
    generated_text = output.outputs[0].text
    print(generated_text)

Result

μΉ΄λ“œ ν• λΆ€ κ²°μ œλŠ” κ²°μ œν•  κΈˆμ•‘μ„ 일정 κΈ°κ°„ λ™μ•ˆ λ‚˜λˆ μ„œ κ°šλŠ” λ°©μ‹μœΌλ‘œ, μΉ΄λ“œμ‚¬μ— μ˜ν•΄ λŒ€μΆœλœ κΈˆμ•‘μ„ κ°šλŠ” κ²ƒμž…λ‹ˆλ‹€. μΉ΄λ“œ ν• λΆ€ κ²°μ œλŠ” μΌμ •ν•œ κΈ°κ°„ λ™μ•ˆ μƒν™˜ν•  수 μžˆλŠ” κΈˆμ•‘μ„ μ„ νƒν•˜μ—¬ κ²°μ œν•  수 있으며, 이 κ³Όμ •μ—μ„œ 이자λ₯Ό μ§€λΆˆν•΄μ•Ό ν•©λ‹ˆλ‹€. μΉ΄λ“œ ν• λΆ€ κ²°μ œλŠ” μΌμ‹œλΆˆ κ²°μ œλ³΄λ‹€ μœ λ¦¬ν•  수 μžˆμ§€λ§Œ, 이자λ₯Ό μ§€λΆˆν•΄μ•Ό ν•˜κΈ° λ•Œλ¬Έμ— λΉ„μš©μ΄ μ¦κ°€ν•©λ‹ˆλ‹€.

Downloads last month
3,264
Safetensors
Model size
8.17B params
Tensor type
BF16
Β·
Inference Examples
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social visibility and check back later, or deploy to Inference Endpoints (dedicated) instead.

Model tree for sh2orc/Llama-3-Korean-8B

Finetunes
2 models