kamrr
/

tinyllama-1.1B_dolly-4.5k_lora

@@ -2,10 +2,11 @@
 license: apache-2.0
 library_name: peft
 tags:
 - generated_from_trainer
 base_model: TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T
 model-index:
-- name: outputs/lora-out
   results: []
 ---
@@ -39,21 +40,23 @@ pad_to_sequence_len: true
 adapter: lora
 lora_model_dir:
-lora_r: 4
 lora_alpha: 16
-lora_dropout: 0.8
 lora_target_linear: true
 lora_fan_in_fan_out:
-wandb_project: tinyllama-dolly-axolotl
-wandb_entity: kamr54
 gradient_accumulation_steps: 4
 micro_batch_size: 2
 num_epochs: 4
 optimizer: adamw_bnb_8bit
 lr_scheduler:
-learning_rate: 0.0008
 train_on_inputs: false
 group_by_length: false
@@ -78,16 +81,15 @@ weight_decay: 0.0
 fsdp:
 fsdp_config:
 special_tokens:
 ```
 </details><br>
-# outputs/lora-out
 This model is a fine-tuned version of [TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T](https://huggingface.co/TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 1.7742
 ## Model description
@@ -106,7 +108,7 @@ More information needed
 ### Training hyperparameters
 The following hyperparameters were used during training:
-- learning_rate: 0.0008
 - train_batch_size: 2
 - eval_batch_size: 2
 - seed: 42
@@ -122,21 +124,21 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch  | Step | Validation Loss |
 |:-------------:|:------:|:----:|:---------------:|
 | 1.8146        | 0.0317 | 1    | 2.1074          |
-| 1.767         | 0.2540 | 8    | 1.8264          |
-| 2.0003        | 0.5079 | 16   | 1.7782          |
-| 1.7691        | 0.7619 | 24   | 1.7640          |
-| 1.8407        | 1.0159 | 32   | 1.7693          |
-| 1.7637        | 1.2460 | 40   | 1.7650          |
-| 1.7748        | 1.5    | 48   | 1.7724          |
-| 1.773         | 1.7540 | 56   | 1.7669          |
-| 1.7533        | 2.0079 | 64   | 1.7599          |
-| 1.5889        | 2.2381 | 72   | 1.7678          |
-| 1.591         | 2.4921 | 80   | 1.7741          |
-| 1.7303        | 2.7460 | 88   | 1.7669          |
-| 1.5035        | 3.0    | 96   | 1.7666          |
-| 1.4954        | 3.2222 | 104  | 1.7715          |
-| 1.6623        | 3.4762 | 112  | 1.7708          |
-| 1.7277        | 3.7302 | 120  | 1.7742          |
 ### Framework versions

 license: apache-2.0
 library_name: peft
 tags:
+- axolotl
 - generated_from_trainer
 base_model: TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T
 model-index:
+- name: tinyllama-1.1B_dolly-4.5k_lora
   results: []
 ---
 adapter: lora
 lora_model_dir:
+lora_r: 16
 lora_alpha: 16
+lora_dropout: 0.5
 lora_target_linear: true
 lora_fan_in_fan_out:
+# wandb_project: tinyllama-dolly-axolotl
+# wandb_entity: kamr54
+hub_model_id: kareemamrr/tinyllama-1.1B_dolly-4.5k_lora
 gradient_accumulation_steps: 4
 micro_batch_size: 2
 num_epochs: 4
 optimizer: adamw_bnb_8bit
 lr_scheduler:
+learning_rate: 0.0004
 train_on_inputs: false
 group_by_length: false
 fsdp:
 fsdp_config:
 special_tokens:
 ```
 </details><br>
+# tinyllama-1.1B_dolly-4.5k_lora
 This model is a fine-tuned version of [TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T](https://huggingface.co/TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 1.7650
 ## Model description
 ### Training hyperparameters
 The following hyperparameters were used during training:
+- learning_rate: 0.0004
 - train_batch_size: 2
 - eval_batch_size: 2
 - seed: 42
 | Training Loss | Epoch  | Step | Validation Loss |
 |:-------------:|:------:|:----:|:---------------:|
 | 1.8146        | 0.0317 | 1    | 2.1074          |
+| 1.7728        | 0.2540 | 8    | 1.8290          |
+| 1.9975        | 0.5079 | 16   | 1.7875          |
+| 1.7685        | 0.7619 | 24   | 1.7717          |
+| 1.8368        | 1.0159 | 32   | 1.7684          |
+| 1.768         | 1.2460 | 40   | 1.7622          |
+| 1.7774        | 1.5    | 48   | 1.7655          |
+| 1.7727        | 1.7540 | 56   | 1.7565          |
+| 1.7453        | 2.0079 | 64   | 1.7502          |
+| 1.5904        | 2.2381 | 72   | 1.7644          |
+| 1.5978        | 2.4921 | 80   | 1.7628          |
+| 1.7305        | 2.7460 | 88   | 1.7600          |
+| 1.4956        | 3.0    | 96   | 1.7582          |
+| 1.503         | 3.2222 | 104  | 1.7603          |
+| 1.6659        | 3.4762 | 112  | 1.7634          |
+| 1.734         | 3.7302 | 120  | 1.7650          |
 ### Framework versions

adapter_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:33dc6dba5051843c44397d04eda3bb5eaae0acfc6c7af6a8100ef6c0141a5637
-size 12726362

 version https://git-lfs.github.com/spec/v1
+oid sha256:be606f7dabf57a40b0d23e7b10b9ef2863b08dd5b69d56cb78ae4637056551c2
+size 50573530