lingchensanwen
/

mistral-ins-generation-best-balanced

Inference Endpoints

Model card Files Files and versions Community

lingchensanwen commited on Apr 17

Commit

8c72d2d

•

1 Parent(s): 0564d0a

Update qlora.yml

Files changed (1) hide show

qlora.yml +4 -11

qlora.yml CHANGED Viewed

@@ -1,32 +1,29 @@
 base_model: mistralai/Mistral-7B-Instruct-v0.2
 model_type: MistralForCausalLM
 tokenizer_type: LlamaTokenizer
-# hub_model_id: mistral-ins-generation #the best saved in mistral-generation-new now
 load_in_8bit: false
 load_in_4bit: true
 strict: false
 datasets:
-  - path: /home/yw23374/axolotl/examples/mistral/data/final_data/upsampled_train.json
     ds_type: json
     type: alpaca
     split: train
 test_datasets:
-  - path: /home/yw23374/axolotl/examples/mistral/data/final_data/val.json
     ds_type: json
     type: alpaca
     split: train
-# metrics:
-#   - {mae}
 load_best_model_at_end: False
 early_stopping_patience:
 dataset_prepared_path:
 val_set_size: 0
-output_dir: ./mistral-ins-upsample-4th
 adapter: qlora
 lora_model_dir:
@@ -57,7 +54,7 @@ wandb_log_model:
 gradient_accumulation_steps: 2
 micro_batch_size: 1
-num_epochs: 3 #3 before is the best
 optimizer: adamw_bnb_8bit
 lr_scheduler: cosine
 learning_rate: 0.0001
@@ -96,7 +93,3 @@ special_tokens:
   bos_token: "<s>"
   eos_token: "</s>"
   unk_token: "<unk>"
-# tokens: # these are delimiters
-#   - "<|im_start|>"
-#   - "<|im_end|>"

 base_model: mistralai/Mistral-7B-Instruct-v0.2
 model_type: MistralForCausalLM
 tokenizer_type: LlamaTokenizer
 load_in_8bit: false
 load_in_4bit: true
 strict: false
 datasets:
+  - path: ../upsampled_train.json
     ds_type: json
     type: alpaca
     split: train
 test_datasets:
+  - path: ../val.json
     ds_type: json
     type: alpaca
     split: train
 load_best_model_at_end: False
 early_stopping_patience:
 dataset_prepared_path:
 val_set_size: 0
+output_dir: ./mistral-ins-upsample-1st
 adapter: qlora
 lora_model_dir:
 gradient_accumulation_steps: 2
 micro_batch_size: 1
+num_epochs: 3
 optimizer: adamw_bnb_8bit
 lr_scheduler: cosine
 learning_rate: 0.0001
   bos_token: "<s>"
   eos_token: "</s>"
   unk_token: "<unk>"