jijihuny's picture
Training in progress, step 413
afd196e verified
dataset:
include_answer: true
name: train
path: jijihuny/economics_qa
shuffle: true
test_size: null
generation:
do_sample: false
dola_layers: null
length_penalty: null
max_new_tokens: 50
num_beams: null
penalty_alpha: null
repetition_penalty: null
return_full_text: false
top_k: 1
metric:
only_inference: false
path: jijihuny/ecqa
model:
attn_implementation: sdpa
device_map: auto
path: MLP-KTLim/llama-3-Korean-Bllossom-8B
system_prompt: "\uB108\uB294 \uC8FC\uC5B4\uC9C4 Context\uC5D0\uC11C Question\uC5D0\
\ \uB300\uD55C Answer\uB97C \uCC3E\uB294 \uCC57\uBD07\uC774\uC57C. Context\uC5D0\
\uC11C Answer\uAC00 \uB420 \uC218 \uC788\uB294 \uBD80\uBD84\uC744 \uCC3E\uC544\
\uC11C \uADF8\uB300\uB85C \uC801\uC5B4\uC918. \uB2E8, Answer\uB294 \uC8FC\uAD00\
\uC2DD\uC774 \uC544\uB2C8\uB77C \uB2E8\uB2F5\uD615\uC73C\uB85C \uC801\uC5B4\uC57C\
\ \uD574."
task: text-generation
torch_dtype: auto
seed: 42
train:
args:
bf16: true
bf16_full_eval: true
dataloader_num_workers: 12
eval_accumulation_steps: 1
eval_on_start: false
eval_steps: 0.1
eval_strategy: steps
gradient_accumulation_steps: 1
learning_rate: 0.0001
logging_steps: 1
lr_scheduler_kwargs:
num_cycles: 5
lr_scheduler_type: cosine
max_grad_norm: 1.2
max_seq_length: 2048
num_train_epochs: 1
optim: paged_adamw_8bit
output_dir: llama3-qlora-completion-only
per_device_eval_batch_size: 32
per_device_train_batch_size: 16
push_to_hub: true
report_to: wandb
run_name: llama3-qlora-completion-only
save_steps: 0.2
torch_compile: true
torch_empty_cache_steps: 5
warmup_ratio: 0.005
weight_decay: 0.01
instruction_template: <|start_header_id|>user<|end_header_id|>
lora:
bias: none
lora_alpha: 32
lora_dropout: 0.05
r: 16
target_modules:
- up_proj
- down_proj
- gate_proj
- k_proj
- q_proj
- v_proj
- o_proj
- lm_head
task_type: CAUSAL_LM
quantization:
bnb_4bit_compute_dtype: bfloat16
bnb_4bit_quant_type: nf4
bnb_4bit_use_double_quant: true
load_in_4bit: true
response_template: <|start_header_id|>assistant<|end_header_id|>
use_completion_only_data_collator: true