Alfitaria
/

mn-inf-qlora-mm

@@ -1,12 +1,11 @@
 ---
-base_model: unsloth/Mistral-Nemo-Base-2407
 library_name: peft
-license: apache-2.0
 tags:
 - axolotl
 - generated_from_trainer
 model-index:
-- name: mn-inf-qlora
   results: []
 ---
@@ -20,19 +19,20 @@ axolotl version: `0.4.1`
 ```yaml
 # Set up for use on 2x24gb cards
 # huggingface-cli login --token $hf_key && wandb login $wandb_key
-# python -m axolotl.cli.preprocess mn-inf-lora.yml
-# accelerate launch -m axolotl.cli.train mn-inf-lora.yml
-# python -m axolotl.cli.merge_lora ms-adventure-s.yml
 # huggingface-cli upload ToastyPigeon/ms-type1-adventure-s adventure-workspace/merged . --private
-base_model: unsloth/Mistral-Nemo-Base-2407
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
 load_in_8bit: false
 load_in_4bit: true
 strict: false
-sequence_len: 8192 # 99% vram
 min_sample_len: 128
 bf16: true
 fp16:
@@ -45,11 +45,14 @@ dataset_prepared_path: last_run_prepared
 datasets:
   - path: botmall/bodinforg-completions
     type: completion
-warmup_steps: 20
 shuffle_merged_datasets: true
 save_safetensors: true
 # WandB
 wandb_project: Mistral-Nemo-Inflation
 wandb_entity:
@@ -59,7 +62,7 @@ num_epochs: 1
 # Output
 output_dir: ./adventure-workspace
-hub_model_id: botmall/mn-inf-qlora
 hub_strategy: "checkpoint"
 # Sampling
@@ -80,7 +83,7 @@ unsloth_cross_entropy_loss: true
 #unsloth_lora_o: true
 # Evaluation
-val_set_size: 40
 evals_per_epoch: 5
 eval_table_size:
 eval_max_new_tokens: 256
@@ -138,11 +141,11 @@ liger_fused_linear_cross_entropy: true
 </details><br>
-# mn-inf-qlora
-This model is a fine-tuned version of [unsloth/Mistral-Nemo-Base-2407](https://huggingface.co/unsloth/Mistral-Nemo-Base-2407) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 2.2226
 ## Model description
@@ -171,18 +174,19 @@ The following hyperparameters were used during training:
 - total_eval_batch_size: 2
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: cosine
-- lr_scheduler_warmup_steps: 20
 - num_epochs: 1
 ### Training results
 | Training Loss | Epoch  | Step | Validation Loss |
 |:-------------:|:------:|:----:|:---------------:|
-| 2.2853        | 0.0057 | 1    | 2.3231          |
-| 2.2576        | 0.2102 | 37   | 2.2478          |
-| 2.1671        | 0.4205 | 74   | 2.2352          |
-| 2.2319        | 0.6307 | 111  | 2.2259          |
-| 2.174         | 0.8409 | 148  | 2.2226          |
 ### Framework versions

 ---
+base_model: inflatebot/MN-12B-Mag-Mell-R1
 library_name: peft
 tags:
 - axolotl
 - generated_from_trainer
 model-index:
+- name: mn-inf-qlora-mm
   results: []
 ---
 ```yaml
 # Set up for use on 2x24gb cards
 # huggingface-cli login --token $hf_key && wandb login $wandb_key
+# python -m axolotl.cli.preprocess mn-magmell-patch.yml
+# accelerate launch -m axolotl.cli.train mn-magmell-patch.yml
+# python -m axolotl.cli.merge_lora mn-magmell-patch.yml
 # huggingface-cli upload ToastyPigeon/ms-type1-adventure-s adventure-workspace/merged . --private
+base_model: inflatebot/MN-12B-Mag-Mell-R1
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
 load_in_8bit: false
 load_in_4bit: true
 strict: false
+sequence_len: 16384 # 99% vram
 min_sample_len: 128
 bf16: true
 fp16:
 datasets:
   - path: botmall/bodinforg-completions
     type: completion
+warmup_steps: 5
 shuffle_merged_datasets: true
 save_safetensors: true
+special_tokens:
+  pad_token: "<pad>"
 # WandB
 wandb_project: Mistral-Nemo-Inflation
 wandb_entity:
 # Output
 output_dir: ./adventure-workspace
+hub_model_id: botmall/mn-inf-qlora-mm
 hub_strategy: "checkpoint"
 # Sampling
 #unsloth_lora_o: true
 # Evaluation
+val_set_size: 20
 evals_per_epoch: 5
 eval_table_size:
 eval_max_new_tokens: 256
 </details><br>
+# mn-inf-qlora-mm
+This model is a fine-tuned version of [inflatebot/MN-12B-Mag-Mell-R1](https://huggingface.co/inflatebot/MN-12B-Mag-Mell-R1) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 2.2760
 ## Model description
 - total_eval_batch_size: 2
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: cosine
+- lr_scheduler_warmup_steps: 5
 - num_epochs: 1
 ### Training results
 | Training Loss | Epoch  | Step | Validation Loss |
 |:-------------:|:------:|:----:|:---------------:|
+| 2.5697        | 0.0119 | 1    | 2.4926          |
+| 2.2991        | 0.2024 | 17   | 2.3356          |
+| 2.199         | 0.4048 | 34   | 2.2999          |
+| 2.3336        | 0.6071 | 51   | 2.2864          |
+| 2.1637        | 0.8095 | 68   | 2.2795          |
+| 2.2057        | 1.0119 | 85   | 2.2760          |
 ### Framework versions