File size: 2,273 Bytes
afd196e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
dataset:
  include_answer: true
  name: train
  path: jijihuny/economics_qa
  shuffle: true
  test_size: null
generation:
  do_sample: false
  dola_layers: null
  length_penalty: null
  max_new_tokens: 50
  num_beams: null
  penalty_alpha: null
  repetition_penalty: null
  return_full_text: false
  top_k: 1
metric:
  only_inference: false
  path: jijihuny/ecqa
model:
  attn_implementation: sdpa
  device_map: auto
  path: MLP-KTLim/llama-3-Korean-Bllossom-8B
  system_prompt: "\uB108\uB294 \uC8FC\uC5B4\uC9C4 Context\uC5D0\uC11C Question\uC5D0\
    \ \uB300\uD55C Answer\uB97C \uCC3E\uB294 \uCC57\uBD07\uC774\uC57C. Context\uC5D0\
    \uC11C Answer\uAC00 \uB420 \uC218 \uC788\uB294 \uBD80\uBD84\uC744 \uCC3E\uC544\
    \uC11C \uADF8\uB300\uB85C \uC801\uC5B4\uC918. \uB2E8, Answer\uB294 \uC8FC\uAD00\
    \uC2DD\uC774 \uC544\uB2C8\uB77C \uB2E8\uB2F5\uD615\uC73C\uB85C \uC801\uC5B4\uC57C\
    \ \uD574."
  task: text-generation
  torch_dtype: auto
seed: 42
train:
  args:
    bf16: true
    bf16_full_eval: true
    dataloader_num_workers: 12
    eval_accumulation_steps: 1
    eval_on_start: false
    eval_steps: 0.1
    eval_strategy: steps
    gradient_accumulation_steps: 1
    learning_rate: 0.0001
    logging_steps: 1
    lr_scheduler_kwargs:
      num_cycles: 5
    lr_scheduler_type: cosine
    max_grad_norm: 1.2
    max_seq_length: 2048
    num_train_epochs: 1
    optim: paged_adamw_8bit
    output_dir: llama3-qlora-completion-only
    per_device_eval_batch_size: 32
    per_device_train_batch_size: 16
    push_to_hub: true
    report_to: wandb
    run_name: llama3-qlora-completion-only
    save_steps: 0.2
    torch_compile: true
    torch_empty_cache_steps: 5
    warmup_ratio: 0.005
    weight_decay: 0.01
  instruction_template: <|start_header_id|>user<|end_header_id|>
  lora:
    bias: none
    lora_alpha: 32
    lora_dropout: 0.05
    r: 16
    target_modules:
    - up_proj
    - down_proj
    - gate_proj
    - k_proj
    - q_proj
    - v_proj
    - o_proj
    - lm_head
    task_type: CAUSAL_LM
  quantization:
    bnb_4bit_compute_dtype: bfloat16
    bnb_4bit_quant_type: nf4
    bnb_4bit_use_double_quant: true
    load_in_4bit: true
  response_template: <|start_header_id|>assistant<|end_header_id|>
  use_completion_only_data_collator: true