|
|
|
|
|
In [ ]: |
|
!pip3 install -q -U transformers==4.38.2 |
|
!pip3 install -q -U datasets==2.18.0 |
|
!pip3 install -q -U bitsandbytes==0.42.0 |
|
!pip3 install -q -U peft==0.9.0 |
|
!pip3 install -q -U trl==0.7.11 |
|
!pip3 install -q -U accelerate==0.27.2 |
|
|
|
|
|
In [ ]: |
|
import torch |
|
from datasets import Dataset, load_dataset |
|
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline, TrainingArguments |
|
from peft import LoraConfig, PeftModel |
|
from trl import SFTTrainer |
|
|
|
|
|
In [ ]: |
|
from huggingface_hub import notebook_login |
|
notebook_login() |
|
|
|
|
|
|
|
In [ ]: |
|
from datasets import load_dataset |
|
dataset = load_dataset("daekeun-ml/naver-news-summarization-ko") |
|
|
|
In [ ]: |
|
dataset |
|
|
|
In [ ]: |
|
dataset['train'][0] |
|
|
|
|
|
|
|
In [ ]: |
|
BASE_MODEL = "google/gemma-2b-it" |
|
|
|
model = AutoModelForCausalLM.from_pretrained(BASE_MODEL, device_map={"":0}) |
|
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, add_special_tokens=True) |
|
|
|
In [ ]: |
|
doc = dataset['train']['document'][0] |
|
In [ ]: |
|
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512) |
|
In [ ]: |
|
messages = [ |
|
{ |
|
"role": "user", |
|
"content": "๋ค์ ๊ธ์ ์์ฝํด์ฃผ์ธ์ :\n\n{}".format(doc) |
|
} |
|
] |
|
prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) |
|
In [ ]: |
|
prompt |
|
|
|
In [ ]: |
|
outputs = pipe( |
|
prompt, |
|
do_sample=True, |
|
temperature=0.2, |
|
top_k=50, |
|
top_p=0.95, |
|
add_special_tokens=True |
|
) |
|
In [ ]: |
|
print(outputs[0]["generated_text"][len(prompt):]) |
|
|
|
|
|
์ฃผ์: Colab GPU ๋ฉ๋ชจ๋ฆฌ ํ๊ณ๋ก ์ด์ ์ฅ ์ถ๋ก ์์ ์ฌ์ฉํ๋ ๋ฉ๋ชจ๋ฆฌ๋ฅผ ๋น์ ์ค์ผ ํ์ธํ๋์ ์งํ ํ ์ ์์ต๋๋ค. |
|
notebook ๋ฐํ์ ์ธ์
์ ์ฌ์์ ํ ํ 1๋ฒ๊ณผ 2๋ฒ์ 2.1 ํญ๋ชฉ๊น์ง ๋ค์ ์คํํ์ฌ ๋ก๋ ํ ํ ์๋ ๊ณผ์ ์ ์งํํฉ๋๋ค |
|
In [ ]: |
|
!nvidia-smi |
|
|
|
In [ ]: |
|
def generate_prompt(example): |
|
prompt_list = [] |
|
for i in range(len(example['document'])): |
|
prompt_list.append(r"""<bos><start_of_turn>user |
|
๋ค์ ๊ธ์ ์์ฝํด์ฃผ์ธ์: |
|
|
|
{}<end_of_turn> |
|
<start_of_turn>model |
|
{}<end_of_turn><eos>""".format(example['document'][i], example['summary'][i])) |
|
return prompt_list |
|
In [ ]: |
|
train_data = dataset['train'] |
|
print(generate_prompt(train_data[:1])[0]) |
|
|
|
In [ ]: |
|
lora_config = LoraConfig( |
|
r=6, |
|
target_modules=["q_proj", "o_proj", "k_proj", "v_proj", "gate_proj", "up_proj", "down_proj"], |
|
task_type="CAUSAL_LM", |
|
) |
|
|
|
bnb_config = BitsAndBytesConfig( |
|
load_in_4bit=True, |
|
bnb_4bit_quant_type="nf4", |
|
bnb_4bit_compute_dtype=torch.float16 |
|
) |
|
In [ ]: |
|
BASE_MODEL = "google/gemma-2b-it" |
|
model = AutoModelForCausalLM.from_pretrained(BASE_MODEL, device_map="auto", quantization_config=bnb_config) |
|
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, add_special_tokens=True) |
|
tokenizer.padding_side = 'right' |
|
|
|
In [ ]: |
|
trainer = SFTTrainer( |
|
model=model, |
|
train_dataset=train_data, |
|
max_seq_length=512, |
|
args=TrainingArguments( |
|
output_dir="outputs", |
|
|
|
max_steps=3000, |
|
per_device_train_batch_size=1, |
|
gradient_accumulation_steps=4, |
|
optim="paged_adamw_8bit", |
|
warmup_steps=0.03, |
|
learning_rate=2e-4, |
|
fp16=True, |
|
logging_steps=100, |
|
push_to_hub=False, |
|
report_to='none', |
|
), |
|
peft_config=lora_config, |
|
formatting_func=generate_prompt, |
|
) |
|
In [ ]: |
|
trainer.train() |
|
|
|
In [ ]: |
|
ADAPTER_MODEL = "lora_adapter" |
|
|
|
trainer.model.save_pretrained(ADAPTER_MODEL) |
|
In [ ]: |
|
!ls -alh lora_adapter |
|
In [ ]: |
|
model = AutoModelForCausalLM.from_pretrained(BASE_MODEL, device_map='auto', torch_dtype=torch.float16) |
|
model = PeftModel.from_pretrained(model, ADAPTER_MODEL, device_map='auto', torch_dtype=torch.float16) |
|
|
|
model = model.merge_and_unload() |
|
model.save_pretrained('gemma-2b-it-sum-ko') |
|
In [ ]: |
|
!ls -alh ./gemma-2b-it-sum-ko |
|
|
|
|
|
์ฃผ์: ๋ง์ฐฌ๊ฐ์ง๋ก Colab GPU ๋ฉ๋ชจ๋ฆฌ ํ๊ณ๋ก ํ์ต ์ ์ฌ์ฉํ๋ ๋ฉ๋ชจ๋ฆฌ๋ฅผ ๋น์ ์ค์ผ ํ์ธํ๋์ ์งํ ํ ์ ์์ต๋๋ค. |
|
notebook ๋ฐํ์ ์ธ์
์ ์ฌ์์ ํ ํ 1๋ฒ๊ณผ 2๋ฒ์ 2.1 ํญ๋ชฉ๊น์ง ๋ค์ ์คํํ์ฌ ๋ก๋ ํ ํ ์๋ ๊ณผ์ ์ ์งํํฉ๋๋ค |
|
In [ ]: |
|
!nvidia-smi |
|
|
|
In [ ]: |
|
BASE_MODEL = "google/gemma-2b-it" |
|
FINETUNE_MODEL = "./gemma-2b-it-sum-ko" |
|
|
|
finetune_model = AutoModelForCausalLM.from_pretrained(FINETUNE_MODEL, device_map={"":0}) |
|
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, add_special_tokens=True) |
|
|
|
In [ ]: |
|
pipe_finetuned = pipeline("text-generation", model=finetune_model, tokenizer=tokenizer, max_new_tokens=512) |
|
In [ ]: |
|
doc = dataset['test']['document'][10] |
|
In [ ]: |
|
messages = [ |
|
{ |
|
"role": "user", |
|
"content": "๋ค์ ๊ธ์ ์์ฝํด์ฃผ์ธ์:\n\n{}".format(doc) |
|
} |
|
] |
|
prompt = pipe_finetuned.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) |
|
In [ ]: |
|
outputs = pipe_finetuned( |
|
prompt, |
|
do_sample=True, |
|
temperature=0.2, |
|
top_k=50, |
|
top_p=0.95, |
|
add_special_tokens=True |
|
) |
|
print(outputs[0]["generated_text"][len(prompt):]) |
|
In [ ]: |
|
|