lingchensanwen commited on
Commit
8c72d2d
1 Parent(s): 0564d0a

Update qlora.yml

Browse files
Files changed (1) hide show
  1. qlora.yml +4 -11
qlora.yml CHANGED
@@ -1,32 +1,29 @@
1
  base_model: mistralai/Mistral-7B-Instruct-v0.2
2
  model_type: MistralForCausalLM
3
  tokenizer_type: LlamaTokenizer
4
- # hub_model_id: mistral-ins-generation #the best saved in mistral-generation-new now
5
 
6
  load_in_8bit: false
7
  load_in_4bit: true
8
  strict: false
9
 
10
  datasets:
11
- - path: /home/yw23374/axolotl/examples/mistral/data/final_data/upsampled_train.json
12
  ds_type: json
13
  type: alpaca
14
  split: train
15
 
16
  test_datasets:
17
- - path: /home/yw23374/axolotl/examples/mistral/data/final_data/val.json
18
  ds_type: json
19
  type: alpaca
20
  split: train
21
 
22
- # metrics:
23
- # - {mae}
24
 
25
  load_best_model_at_end: False
26
  early_stopping_patience:
27
  dataset_prepared_path:
28
  val_set_size: 0
29
- output_dir: ./mistral-ins-upsample-4th
30
 
31
  adapter: qlora
32
  lora_model_dir:
@@ -57,7 +54,7 @@ wandb_log_model:
57
 
58
  gradient_accumulation_steps: 2
59
  micro_batch_size: 1
60
- num_epochs: 3 #3 before is the best
61
  optimizer: adamw_bnb_8bit
62
  lr_scheduler: cosine
63
  learning_rate: 0.0001
@@ -96,7 +93,3 @@ special_tokens:
96
  bos_token: "<s>"
97
  eos_token: "</s>"
98
  unk_token: "<unk>"
99
-
100
- # tokens: # these are delimiters
101
- # - "<|im_start|>"
102
- # - "<|im_end|>"
 
1
  base_model: mistralai/Mistral-7B-Instruct-v0.2
2
  model_type: MistralForCausalLM
3
  tokenizer_type: LlamaTokenizer
 
4
 
5
  load_in_8bit: false
6
  load_in_4bit: true
7
  strict: false
8
 
9
  datasets:
10
+ - path: ../upsampled_train.json
11
  ds_type: json
12
  type: alpaca
13
  split: train
14
 
15
  test_datasets:
16
+ - path: ../val.json
17
  ds_type: json
18
  type: alpaca
19
  split: train
20
 
 
 
21
 
22
  load_best_model_at_end: False
23
  early_stopping_patience:
24
  dataset_prepared_path:
25
  val_set_size: 0
26
+ output_dir: ./mistral-ins-upsample-1st
27
 
28
  adapter: qlora
29
  lora_model_dir:
 
54
 
55
  gradient_accumulation_steps: 2
56
  micro_batch_size: 1
57
+ num_epochs: 3
58
  optimizer: adamw_bnb_8bit
59
  lr_scheduler: cosine
60
  learning_rate: 0.0001
 
93
  bos_token: "<s>"
94
  eos_token: "</s>"
95
  unk_token: "<unk>"