lingchensanwen
commited on
Commit
•
8c72d2d
1
Parent(s):
0564d0a
Update qlora.yml
Browse files
qlora.yml
CHANGED
@@ -1,32 +1,29 @@
|
|
1 |
base_model: mistralai/Mistral-7B-Instruct-v0.2
|
2 |
model_type: MistralForCausalLM
|
3 |
tokenizer_type: LlamaTokenizer
|
4 |
-
# hub_model_id: mistral-ins-generation #the best saved in mistral-generation-new now
|
5 |
|
6 |
load_in_8bit: false
|
7 |
load_in_4bit: true
|
8 |
strict: false
|
9 |
|
10 |
datasets:
|
11 |
-
- path:
|
12 |
ds_type: json
|
13 |
type: alpaca
|
14 |
split: train
|
15 |
|
16 |
test_datasets:
|
17 |
-
- path:
|
18 |
ds_type: json
|
19 |
type: alpaca
|
20 |
split: train
|
21 |
|
22 |
-
# metrics:
|
23 |
-
# - {mae}
|
24 |
|
25 |
load_best_model_at_end: False
|
26 |
early_stopping_patience:
|
27 |
dataset_prepared_path:
|
28 |
val_set_size: 0
|
29 |
-
output_dir: ./mistral-ins-upsample-
|
30 |
|
31 |
adapter: qlora
|
32 |
lora_model_dir:
|
@@ -57,7 +54,7 @@ wandb_log_model:
|
|
57 |
|
58 |
gradient_accumulation_steps: 2
|
59 |
micro_batch_size: 1
|
60 |
-
num_epochs: 3
|
61 |
optimizer: adamw_bnb_8bit
|
62 |
lr_scheduler: cosine
|
63 |
learning_rate: 0.0001
|
@@ -96,7 +93,3 @@ special_tokens:
|
|
96 |
bos_token: "<s>"
|
97 |
eos_token: "</s>"
|
98 |
unk_token: "<unk>"
|
99 |
-
|
100 |
-
# tokens: # these are delimiters
|
101 |
-
# - "<|im_start|>"
|
102 |
-
# - "<|im_end|>"
|
|
|
1 |
base_model: mistralai/Mistral-7B-Instruct-v0.2
|
2 |
model_type: MistralForCausalLM
|
3 |
tokenizer_type: LlamaTokenizer
|
|
|
4 |
|
5 |
load_in_8bit: false
|
6 |
load_in_4bit: true
|
7 |
strict: false
|
8 |
|
9 |
datasets:
|
10 |
+
- path: ../upsampled_train.json
|
11 |
ds_type: json
|
12 |
type: alpaca
|
13 |
split: train
|
14 |
|
15 |
test_datasets:
|
16 |
+
- path: ../val.json
|
17 |
ds_type: json
|
18 |
type: alpaca
|
19 |
split: train
|
20 |
|
|
|
|
|
21 |
|
22 |
load_best_model_at_end: False
|
23 |
early_stopping_patience:
|
24 |
dataset_prepared_path:
|
25 |
val_set_size: 0
|
26 |
+
output_dir: ./mistral-ins-upsample-1st
|
27 |
|
28 |
adapter: qlora
|
29 |
lora_model_dir:
|
|
|
54 |
|
55 |
gradient_accumulation_steps: 2
|
56 |
micro_batch_size: 1
|
57 |
+
num_epochs: 3
|
58 |
optimizer: adamw_bnb_8bit
|
59 |
lr_scheduler: cosine
|
60 |
learning_rate: 0.0001
|
|
|
93 |
bos_token: "<s>"
|
94 |
eos_token: "</s>"
|
95 |
unk_token: "<unk>"
|
|
|
|
|
|
|
|