jijihuny commited on
Commit
05ab353
·
verified ·
1 Parent(s): 4213d96

Training in progress, step 2891

Browse files
.ipynb_checkpoints/config-checkpoint.yaml CHANGED
@@ -73,17 +73,17 @@ train:
73
  # group_by_length: true
74
  max_seq_length: 2048
75
  eval_strategy: steps
76
- per_device_train_batch_size: 4
77
- per_device_eval_batch_size: 8
78
  gradient_accumulation_steps: 1
79
  eval_accumulation_steps: 1
80
 
81
  optim: paged_adamw_8bit
82
  bf16: true
83
  bf16_full_eval: true
84
- learning_rate: 0.00002
85
  weight_decay: 0.01
86
- num_train_epochs: 1
87
  warmup_ratio: 0.005
88
  max_grad_norm: 2.0
89
 
@@ -94,5 +94,5 @@ train:
94
 
95
  push_to_hub: true
96
 
97
- torch_compile: true
98
  seed: 42
 
73
  # group_by_length: true
74
  max_seq_length: 2048
75
  eval_strategy: steps
76
+ per_device_train_batch_size: 16
77
+ per_device_eval_batch_size: 32
78
  gradient_accumulation_steps: 1
79
  eval_accumulation_steps: 1
80
 
81
  optim: paged_adamw_8bit
82
  bf16: true
83
  bf16_full_eval: true
84
+ learning_rate: 0.0002
85
  weight_decay: 0.01
86
+ num_train_epochs: 3
87
  warmup_ratio: 0.005
88
  max_grad_norm: 2.0
89
 
 
94
 
95
  push_to_hub: true
96
 
97
+ # torch_compile: true
98
  seed: 42
adapter_config.json CHANGED
@@ -21,11 +21,11 @@
21
  "revision": null,
22
  "target_modules": [
23
  "q_proj",
24
- "k_proj",
25
- "gate_proj",
26
- "v_proj",
27
  "up_proj",
28
  "o_proj",
 
 
 
29
  "down_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
 
21
  "revision": null,
22
  "target_modules": [
23
  "q_proj",
 
 
 
24
  "up_proj",
25
  "o_proj",
26
+ "v_proj",
27
+ "k_proj",
28
+ "gate_proj",
29
  "down_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:429b256cd598574ebc695ce486072c5c703d6cc4315b96dd20b1ffbe8d8e04eb
3
  size 167832240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:206f133c1a55ddcc354c928242558bab36dd1ea63d300c8cd94f4c8b352a95ec
3
  size 167832240
config.yaml CHANGED
@@ -73,17 +73,17 @@ train:
73
  # group_by_length: true
74
  max_seq_length: 2048
75
  eval_strategy: steps
76
- per_device_train_batch_size: 4
77
- per_device_eval_batch_size: 8
78
  gradient_accumulation_steps: 1
79
  eval_accumulation_steps: 1
80
 
81
  optim: paged_adamw_8bit
82
  bf16: true
83
  bf16_full_eval: true
84
- learning_rate: 0.00002
85
  weight_decay: 0.01
86
- num_train_epochs: 1
87
  warmup_ratio: 0.005
88
  max_grad_norm: 2.0
89
 
@@ -94,5 +94,5 @@ train:
94
 
95
  push_to_hub: true
96
 
97
- torch_compile: true
98
  seed: 42
 
73
  # group_by_length: true
74
  max_seq_length: 2048
75
  eval_strategy: steps
76
+ per_device_train_batch_size: 16
77
+ per_device_eval_batch_size: 32
78
  gradient_accumulation_steps: 1
79
  eval_accumulation_steps: 1
80
 
81
  optim: paged_adamw_8bit
82
  bf16: true
83
  bf16_full_eval: true
84
+ learning_rate: 0.0002
85
  weight_decay: 0.01
86
+ num_train_epochs: 3
87
  warmup_ratio: 0.005
88
  max_grad_norm: 2.0
89
 
 
94
 
95
  push_to_hub: true
96
 
97
+ # torch_compile: true
98
  seed: 42
tokenizer_config.json CHANGED
@@ -2052,7 +2052,6 @@
2052
  "bos_token": "<|begin_of_text|>",
2053
  "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
2054
  "clean_up_tokenization_spaces": true,
2055
- "device_map": "auto",
2056
  "eos_token": "<|eot_id|>",
2057
  "model_input_names": [
2058
  "input_ids",
 
2052
  "bos_token": "<|begin_of_text|>",
2053
  "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
2054
  "clean_up_tokenization_spaces": true,
 
2055
  "eos_token": "<|eot_id|>",
2056
  "model_input_names": [
2057
  "input_ids",
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d14636df665ef9af837b7c0e54de13e25f66ec22c9404e067f74e44d629828fb
3
  size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4be0f582408e2d7d80578b82d0f5d327d6639e4fa47e1a721f251f43f343529
3
  size 5432