FatCat87 commited on
Commit
c8d4c9f
·
verified ·
1 Parent(s): bdb8d46

End of training

Browse files
Files changed (2) hide show
  1. README.md +29 -27
  2. adapter_model.bin +1 -1
README.md CHANGED
@@ -1,10 +1,11 @@
1
  ---
2
  library_name: peft
3
  tags:
 
4
  - generated_from_trainer
5
  base_model: mhenrichsen/gemma-7b
6
  model-index:
7
- - name: outputs/out
8
  results: []
9
  ---
10
 
@@ -36,6 +37,7 @@ fsdp_config: null
36
  gradient_accumulation_steps: 3
37
  gradient_checkpointing: true
38
  group_by_length: false
 
39
  learning_rate: 0.0002
40
  load_in_4bit: true
41
  load_in_8bit: false
@@ -62,11 +64,12 @@ tf32: false
62
  tokenizer_type: AutoTokenizer
63
  train_on_inputs: false
64
  val_set_size: 0.1
65
- wandb_entity: null
66
  wandb_log_model: null
67
- wandb_name: test-task
68
- wandb_project: null
69
- wandb_runid: test-task
 
70
  wandb_watch: null
71
  warmup_ratio: 0.1
72
  weight_decay: 0.0
@@ -76,11 +79,12 @@ xformers_attention: null
76
 
77
  </details><br>
78
 
79
- # outputs/out
 
80
 
81
  This model is a fine-tuned version of [mhenrichsen/gemma-7b](https://huggingface.co/mhenrichsen/gemma-7b) on the None dataset.
82
  It achieves the following results on the evaluation set:
83
- - Loss: 1.0412
84
 
85
  ## Model description
86
 
@@ -103,33 +107,31 @@ The following hyperparameters were used during training:
103
  - train_batch_size: 2
104
  - eval_batch_size: 2
105
  - seed: 42
106
- - distributed_type: multi-GPU
107
- - num_devices: 2
108
  - gradient_accumulation_steps: 3
109
- - total_train_batch_size: 12
110
- - total_eval_batch_size: 4
111
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
112
  - lr_scheduler_type: cosine
113
- - lr_scheduler_warmup_steps: 2
114
  - num_epochs: 4
115
 
116
  ### Training results
117
 
118
- | Training Loss | Epoch | Step | Validation Loss |
119
- |:-------------:|:------:|:----:|:---------------:|
120
- | 1.0695 | 0.1579 | 1 | 1.1912 |
121
- | 1.1142 | 0.3158 | 2 | 1.1076 |
122
- | 1.0936 | 0.6316 | 4 | 1.0834 |
123
- | 1.058 | 0.9474 | 6 | 1.0378 |
124
- | 0.9794 | 1.1579 | 8 | 1.0479 |
125
- | 0.9632 | 1.4737 | 10 | 1.0377 |
126
- | 0.951 | 1.7895 | 12 | 1.0467 |
127
- | 1.0219 | 2.1053 | 14 | 1.0463 |
128
- | 0.9345 | 2.3158 | 16 | 1.0417 |
129
- | 0.9314 | 2.6316 | 18 | 1.0434 |
130
- | 0.9108 | 2.9474 | 20 | 1.0363 |
131
- | 0.894 | 3.1579 | 22 | 1.0348 |
132
- | 0.8835 | 3.4737 | 24 | 1.0412 |
 
133
 
134
 
135
  ### Framework versions
 
1
  ---
2
  library_name: peft
3
  tags:
4
+ - axolotl
5
  - generated_from_trainer
6
  base_model: mhenrichsen/gemma-7b
7
  model-index:
8
+ - name: test-task-2025-01-06
9
  results: []
10
  ---
11
 
 
37
  gradient_accumulation_steps: 3
38
  gradient_checkpointing: true
39
  group_by_length: false
40
+ hub_model_id: FatCat87/test-task-2025-01-06
41
  learning_rate: 0.0002
42
  load_in_4bit: true
43
  load_in_8bit: false
 
64
  tokenizer_type: AutoTokenizer
65
  train_on_inputs: false
66
  val_set_size: 0.1
67
+ wandb_entity: fatcat87-taopanda
68
  wandb_log_model: null
69
+ wandb_mode: online
70
+ wandb_name: test-task-2025-01-06
71
+ wandb_project: subnet56
72
+ wandb_runid: test-task-2025-01-06
73
  wandb_watch: null
74
  warmup_ratio: 0.1
75
  weight_decay: 0.0
 
79
 
80
  </details><br>
81
 
82
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/fatcat87-taopanda/subnet56/runs/p0rc3cvq)
83
+ # test-task-2025-01-06
84
 
85
  This model is a fine-tuned version of [mhenrichsen/gemma-7b](https://huggingface.co/mhenrichsen/gemma-7b) on the None dataset.
86
  It achieves the following results on the evaluation set:
87
+ - Loss: 1.0913
88
 
89
  ## Model description
90
 
 
107
  - train_batch_size: 2
108
  - eval_batch_size: 2
109
  - seed: 42
 
 
110
  - gradient_accumulation_steps: 3
111
+ - total_train_batch_size: 6
 
112
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
113
  - lr_scheduler_type: cosine
114
+ - lr_scheduler_warmup_steps: 5
115
  - num_epochs: 4
116
 
117
  ### Training results
118
 
119
+ | Training Loss | Epoch | Step | Validation Loss |
120
+ |:-------------:|:-----:|:----:|:---------------:|
121
+ | 1.046 | 0.075 | 1 | 1.1912 |
122
+ | 1.1095 | 0.3 | 4 | 1.1067 |
123
+ | 1.0619 | 0.6 | 8 | 1.0441 |
124
+ | 1.0547 | 0.9 | 12 | 1.0446 |
125
+ | 0.931 | 1.15 | 16 | 1.0528 |
126
+ | 0.8836 | 1.45 | 20 | 1.0399 |
127
+ | 0.8958 | 1.75 | 24 | 1.0419 |
128
+ | 0.9922 | 2.05 | 28 | 1.0361 |
129
+ | 0.7736 | 2.3 | 32 | 1.0851 |
130
+ | 0.7437 | 2.6 | 36 | 1.0840 |
131
+ | 0.7552 | 2.9 | 40 | 1.0769 |
132
+ | 0.6623 | 3.15 | 44 | 1.0870 |
133
+ | 0.7173 | 3.45 | 48 | 1.0946 |
134
+ | 0.7122 | 3.75 | 52 | 1.0913 |
135
 
136
 
137
  ### Framework versions
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab93f1af6b30528b09ddf4f5826c4a502dd62cb47134a77f138d4a97e8d27c9b
3
  size 200157610
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c05c1b8dba8f8c380d8615f6e36c46a7b81f5153ea5ac58052c3feac0713a74e
3
  size 200157610