File size: 2,157 Bytes
ba4b632
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2ad8002
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
---

license: apache-2.0
base_model:
- Qwen/Qwen2.5-7B
library_name: transformers
language:
- zho
- eng
- fra
- spa
- por
- deu
- ita
- rus
- jpn
- kor
- vie
- tha
- ara
---


```

base_model: Qwen/Qwen2.5-7B

model_type: AutoModelForCausalLM

tokenizer_type: AutoTokenizer



load_in_8bit: false

load_in_4bit: false

strict: false



datasets:

  - path: PocketDoc/Dans-MemoryCore-CoreCurriculum-Small

    type: sharegpt

    conversation: chatml

  - path: NewEden/Kalo-Opus-Instruct-22k-Refusal-Murdered

    type: sharegpt

    conversation: chatml

  - path: Epiculous/Synthstruct-Gens-v1.1-Filtered-n-Cleaned

    type: sharegpt

    conversation: chatml

  - path: NewEden/Gryphe-Sonnet-3.5-35k-Subset

    type: sharegpt

    conversation: chatml

  - path: Nitral-AI/Reasoning-1shot_ShareGPT

    type: sharegpt

    conversation: chatml

  - path: Nitral-AI/GU_Instruct-ShareGPT

    type: sharegpt

    conversation: chatml

  - path: Nitral-AI/Medical_Instruct-ShareGPT

    type: sharegpt

    conversation: chatml



chat_template: chatml



val_set_size: 0.01

output_dir: ./outputs/out



adapter:

lora_r:

lora_alpha:

lora_dropout:

lora_target_linear:



sequence_len: 8192

# sequence_len: 32768

sample_packing: true

eval_sample_packing: false

pad_to_sequence_len: true



plugins:

  - axolotl.integrations.liger.LigerPlugin

liger_rope: true

liger_rms_norm: true

liger_swiglu: true

liger_fused_linear_cross_entropy: true



wandb_project: qwen7B

wandb_entity:

wandb_watch:

wandb_name: qwen7B

wandb_log_model:



gradient_accumulation_steps: 32

micro_batch_size: 1

num_epochs: 2

optimizer: adamw_bnb_8bit

lr_scheduler: cosine

learning_rate: 0.00001

weight_decay: 0.05



train_on_inputs: false

group_by_length: false

bf16: auto

fp16:

tf32: true



gradient_checkpointing: true

early_stopping_patience:

resume_from_checkpoint:

local_rank:

logging_steps: 1

xformers_attention:

flash_attention: true



warmup_ratio: 0.1

evals_per_epoch: 4

eval_table_size:

eval_max_new_tokens: 128

saves_per_epoch: 2



debug:

deepspeed:

fsdp:

fsdp_config:



special_tokens:

  pad_token: <pad>

```