KasaiDanto commited on
Commit
7c66f16
·
verified ·
1 Parent(s): c832738

End of training

Browse files
README.md CHANGED
@@ -16,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  This model is a fine-tuned version of [google/gemma-2-2b-it](https://huggingface.co/google/gemma-2-2b-it) on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
- - Loss: 1.7752
20
 
21
  ## Model description
22
 
@@ -36,26 +36,26 @@ More information needed
36
 
37
  The following hyperparameters were used during training:
38
  - learning_rate: 5e-05
39
- - train_batch_size: 1
40
- - eval_batch_size: 1
41
  - seed: 4856
42
  - gradient_accumulation_steps: 4
43
- - total_train_batch_size: 4
44
  - optimizer: Adam with betas=(0.9,0.995) and epsilon=1e-08
45
  - lr_scheduler_type: reduce_lr_on_plateau
46
  - num_epochs: 2
47
 
48
  ### Training results
49
 
50
- | Training Loss | Epoch | Step | Validation Loss |
51
- |:-------------:|:------:|:-----:|:---------------:|
52
- | 1.8951 | 1.0000 | 7824 | 1.8894 |
53
- | 1.7225 | 1.9999 | 15648 | 1.7752 |
54
 
55
 
56
  ### Framework versions
57
 
58
- - PEFT 0.13.2
59
  - Transformers 4.45.1
60
  - Pytorch 2.4.0
61
  - Datasets 3.0.1
 
16
 
17
  This model is a fine-tuned version of [google/gemma-2-2b-it](https://huggingface.co/google/gemma-2-2b-it) on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
+ - Loss: 1.8785
20
 
21
  ## Model description
22
 
 
36
 
37
  The following hyperparameters were used during training:
38
  - learning_rate: 5e-05
39
+ - train_batch_size: 2
40
+ - eval_batch_size: 2
41
  - seed: 4856
42
  - gradient_accumulation_steps: 4
43
+ - total_train_batch_size: 8
44
  - optimizer: Adam with betas=(0.9,0.995) and epsilon=1e-08
45
  - lr_scheduler_type: reduce_lr_on_plateau
46
  - num_epochs: 2
47
 
48
  ### Training results
49
 
50
+ | Training Loss | Epoch | Step | Validation Loss |
51
+ |:-------------:|:------:|:----:|:---------------:|
52
+ | 2.0279 | 0.9999 | 3912 | 1.9928 |
53
+ | 1.8365 | 1.9999 | 7824 | 1.8785 |
54
 
55
 
56
  ### Framework versions
57
 
58
+ - PEFT 0.14.0
59
  - Transformers 4.45.1
60
  - Pytorch 2.4.0
61
  - Datasets 3.0.1
adapter_config.json CHANGED
@@ -3,6 +3,8 @@
3
  "auto_mapping": null,
4
  "base_model_name_or_path": "google/gemma-2-2b-it",
5
  "bias": "none",
 
 
6
  "fan_in_fan_out": false,
7
  "inference_mode": true,
8
  "init_lora_weights": true,
@@ -11,6 +13,7 @@
11
  "layers_to_transform": null,
12
  "loftq_config": {},
13
  "lora_alpha": 16,
 
14
  "lora_dropout": 0.1,
15
  "megatron_config": null,
16
  "megatron_core": "megatron.core",
@@ -20,13 +23,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
 
 
23
  "q_proj",
24
- "gate_proj",
25
  "v_proj",
26
- "k_proj",
27
  "down_proj",
28
- "o_proj",
29
- "up_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
3
  "auto_mapping": null,
4
  "base_model_name_or_path": "google/gemma-2-2b-it",
5
  "bias": "none",
6
+ "eva_config": null,
7
+ "exclude_modules": null,
8
  "fan_in_fan_out": false,
9
  "inference_mode": true,
10
  "init_lora_weights": true,
 
13
  "layers_to_transform": null,
14
  "loftq_config": {},
15
  "lora_alpha": 16,
16
+ "lora_bias": false,
17
  "lora_dropout": 0.1,
18
  "megatron_config": null,
19
  "megatron_core": "megatron.core",
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
+ "o_proj",
27
+ "k_proj",
28
+ "up_proj",
29
  "q_proj",
 
30
  "v_proj",
 
31
  "down_proj",
32
+ "gate_proj"
 
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a370041cafe67597a41400a259b78c0ab58862ba4bac2dbe32295fcfcff0d2e
3
  size 41581360
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81074065d77991800b5b0a08d6783de402a32d8780a3a451ade3cd8d6f55402b
3
  size 41581360
runs/Dec08_07-56-39_3c46bd1dcd85/events.out.tfevents.1733644600.3c46bd1dcd85.30.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92e393b7dbaec7aa7222e0f166debfd7601b49eb6a6bbbc5c60ee036c27f0e89
3
+ size 9734
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3937b0bacb52dab66fdae21c3d354cb35fd3bf0b9bdaf7f337581ea386e2c697
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:405eba1f683dc1d69866e1c523b97c83fcbd8e4c693884b09e9278a7b5dcb6b5
3
  size 5240