|
dataset: |
|
include_answer: true |
|
name: train |
|
path: jijihuny/economics_qa |
|
shuffle: true |
|
test_size: null |
|
generation: |
|
do_sample: false |
|
dola_layers: null |
|
length_penalty: null |
|
max_new_tokens: 50 |
|
num_beams: null |
|
penalty_alpha: null |
|
repetition_penalty: null |
|
return_full_text: false |
|
top_k: 1 |
|
metric: |
|
only_inference: false |
|
path: jijihuny/ecqa |
|
model: |
|
attn_implementation: sdpa |
|
device_map: auto |
|
path: MLP-KTLim/llama-3-Korean-Bllossom-8B |
|
system_prompt: "\uB108\uB294 \uC8FC\uC5B4\uC9C4 Context\uC5D0\uC11C Question\uC5D0\ |
|
\ \uB300\uD55C Answer\uB97C \uCC3E\uB294 \uCC57\uBD07\uC774\uC57C. Context\uC5D0\ |
|
\uC11C Answer\uAC00 \uB420 \uC218 \uC788\uB294 \uBD80\uBD84\uC744 \uCC3E\uC544\ |
|
\uC11C \uADF8\uB300\uB85C \uC801\uC5B4\uC918. \uB2E8, Answer\uB294 \uC8FC\uAD00\ |
|
\uC2DD\uC774 \uC544\uB2C8\uB77C \uB2E8\uB2F5\uD615\uC73C\uB85C \uC801\uC5B4\uC57C\ |
|
\ \uD574." |
|
task: text-generation |
|
torch_dtype: auto |
|
seed: 42 |
|
train: |
|
args: |
|
bf16: true |
|
bf16_full_eval: true |
|
dataloader_num_workers: 12 |
|
eval_accumulation_steps: 1 |
|
eval_on_start: false |
|
eval_steps: 0.1 |
|
eval_strategy: steps |
|
gradient_accumulation_steps: 1 |
|
learning_rate: 0.0001 |
|
logging_steps: 1 |
|
lr_scheduler_kwargs: |
|
num_cycles: 5 |
|
lr_scheduler_type: cosine |
|
max_grad_norm: 1.2 |
|
max_seq_length: 2048 |
|
num_train_epochs: 1 |
|
optim: paged_adamw_8bit |
|
output_dir: llama3-qlora-completion-only |
|
per_device_eval_batch_size: 32 |
|
per_device_train_batch_size: 16 |
|
push_to_hub: true |
|
report_to: wandb |
|
run_name: llama3-qlora-completion-only |
|
save_steps: 0.2 |
|
torch_compile: true |
|
torch_empty_cache_steps: 5 |
|
warmup_ratio: 0.005 |
|
weight_decay: 0.01 |
|
instruction_template: <|start_header_id|>user<|end_header_id|> |
|
lora: |
|
bias: none |
|
lora_alpha: 32 |
|
lora_dropout: 0.05 |
|
r: 16 |
|
target_modules: |
|
- up_proj |
|
- down_proj |
|
- gate_proj |
|
- k_proj |
|
- q_proj |
|
- v_proj |
|
- o_proj |
|
- lm_head |
|
task_type: CAUSAL_LM |
|
quantization: |
|
bnb_4bit_compute_dtype: bfloat16 |
|
bnb_4bit_quant_type: nf4 |
|
bnb_4bit_use_double_quant: true |
|
load_in_4bit: true |
|
response_template: <|start_header_id|>assistant<|end_header_id|> |
|
use_completion_only_data_collator: true |
|
|