Alignment-Lab-AI commited on
Commit
9d1927c
·
1 Parent(s): b0adfa1

sophiamini

Browse files
README.md ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+
7
+ The following `bitsandbytes` quantization config was used during training:
8
+ - load_in_8bit: False
9
+ - load_in_4bit: True
10
+ - llm_int8_threshold: 6.0
11
+ - llm_int8_skip_modules: None
12
+ - llm_int8_enable_fp32_cpu_offload: False
13
+ - llm_int8_has_fp16_weight: False
14
+ - bnb_4bit_quant_type: nf4
15
+ - bnb_4bit_use_double_quant: True
16
+ - bnb_4bit_compute_dtype: float16
17
+ ### Framework versions
18
+
19
+
20
+ - PEFT 0.4.0
adapter_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "openlm-research/open_llama_3b",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 32.0,
11
+ "lora_dropout": 0.1,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 8,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "q_proj",
18
+ "v_proj"
19
+ ],
20
+ "task_type": "CAUSAL_LM"
21
+ }
adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9173400afd5962cc0cd954e4b94b9aa8fb7f58632b5b865776bebea2ee70a2c8
3
+ size 10685229
all_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.74,
3
+ "train_loss": 1.9701327433188756,
4
+ "train_runtime": 86.3764,
5
+ "train_samples_per_second": 38.691,
6
+ "train_steps_per_second": 0.278
7
+ }
finetuning_args.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "finetuning_type": "lora",
3
+ "lora_alpha": 32.0,
4
+ "lora_dropout": 0.1,
5
+ "lora_rank": 8,
6
+ "lora_target": [
7
+ "q_proj",
8
+ "v_proj"
9
+ ],
10
+ "name_module_trainable": "mlp",
11
+ "num_hidden_layers": 32,
12
+ "num_layer_trainable": 3
13
+ }
train_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.74,
3
+ "train_loss": 1.9701327433188756,
4
+ "train_runtime": 86.3764,
5
+ "train_samples_per_second": 38.691,
6
+ "train_steps_per_second": 0.278
7
+ }
trainer_log.jsonl ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"current_steps": 1, "total_steps": 24, "loss": 2.06, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9786121534345265e-05, "epoch": 0.11, "percentage": 4.17, "elapsed_time": "0:00:03", "remaining_time": "0:01:26"}
2
+ {"current_steps": 2, "total_steps": 24, "loss": 2.0206, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.914814565722671e-05, "epoch": 0.23, "percentage": 8.33, "elapsed_time": "0:00:07", "remaining_time": "0:01:21"}
3
+ {"current_steps": 3, "total_steps": 24, "loss": 2.0372, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8096988312782174e-05, "epoch": 0.34, "percentage": 12.5, "elapsed_time": "0:00:10", "remaining_time": "0:01:16"}
4
+ {"current_steps": 4, "total_steps": 24, "loss": 1.9041, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.665063509461097e-05, "epoch": 0.46, "percentage": 16.67, "elapsed_time": "0:00:14", "remaining_time": "0:01:11"}
5
+ {"current_steps": 5, "total_steps": 24, "loss": 2.0016, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.4833833507280884e-05, "epoch": 0.57, "percentage": 20.83, "elapsed_time": "0:00:18", "remaining_time": "0:01:08"}
6
+ {"current_steps": 6, "total_steps": 24, "loss": 1.9804, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.267766952966369e-05, "epoch": 0.69, "percentage": 25.0, "elapsed_time": "0:00:21", "remaining_time": "0:01:04"}
7
+ {"current_steps": 7, "total_steps": 24, "loss": 2.054, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.021903572521802e-05, "epoch": 0.8, "percentage": 29.17, "elapsed_time": "0:00:25", "remaining_time": "0:01:01"}
8
+ {"current_steps": 8, "total_steps": 24, "loss": 1.9366, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.7500000000000003e-05, "epoch": 0.91, "percentage": 33.33, "elapsed_time": "0:00:28", "remaining_time": "0:00:57"}
9
+ {"current_steps": 9, "total_steps": 24, "loss": 1.9305, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.456708580912725e-05, "epoch": 1.03, "percentage": 37.5, "elapsed_time": "0:00:32", "remaining_time": "0:00:54"}
10
+ {"current_steps": 10, "total_steps": 24, "loss": 1.9426, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.147047612756302e-05, "epoch": 1.14, "percentage": 41.67, "elapsed_time": "0:00:35", "remaining_time": "0:00:50"}
11
+ {"current_steps": 11, "total_steps": 24, "loss": 1.968, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.8263154805501297e-05, "epoch": 1.26, "percentage": 45.83, "elapsed_time": "0:00:39", "remaining_time": "0:00:46"}
12
+ {"current_steps": 12, "total_steps": 24, "loss": 2.0241, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.5e-05, "epoch": 1.37, "percentage": 50.0, "elapsed_time": "0:00:43", "remaining_time": "0:00:43"}
13
+ {"current_steps": 13, "total_steps": 24, "loss": 1.9506, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.173684519449872e-05, "epoch": 1.49, "percentage": 54.17, "elapsed_time": "0:00:47", "remaining_time": "0:00:39"}
14
+ {"current_steps": 14, "total_steps": 24, "loss": 2.0218, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.852952387243698e-05, "epoch": 1.6, "percentage": 58.33, "elapsed_time": "0:00:50", "remaining_time": "0:00:36"}
15
+ {"current_steps": 15, "total_steps": 24, "loss": 1.896, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5432914190872757e-05, "epoch": 1.71, "percentage": 62.5, "elapsed_time": "0:00:54", "remaining_time": "0:00:32"}
16
+ {"current_steps": 16, "total_steps": 24, "loss": 1.9558, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2500000000000006e-05, "epoch": 1.83, "percentage": 66.67, "elapsed_time": "0:00:57", "remaining_time": "0:00:28"}
17
+ {"current_steps": 17, "total_steps": 24, "loss": 1.935, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.780964274781984e-06, "epoch": 1.94, "percentage": 70.83, "elapsed_time": "0:01:01", "remaining_time": "0:00:25"}
18
+ {"current_steps": 18, "total_steps": 24, "loss": 1.9452, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.3223304703363135e-06, "epoch": 2.06, "percentage": 75.0, "elapsed_time": "0:01:04", "remaining_time": "0:00:21"}
19
+ {"current_steps": 19, "total_steps": 24, "loss": 1.9464, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.166166492719124e-06, "epoch": 2.17, "percentage": 79.17, "elapsed_time": "0:01:08", "remaining_time": "0:00:18"}
20
+ {"current_steps": 20, "total_steps": 24, "loss": 1.9029, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.3493649053890326e-06, "epoch": 2.29, "percentage": 83.33, "elapsed_time": "0:01:11", "remaining_time": "0:00:14"}
21
+ {"current_steps": 21, "total_steps": 24, "loss": 1.9304, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9030116872178316e-06, "epoch": 2.4, "percentage": 87.5, "elapsed_time": "0:01:15", "remaining_time": "0:00:10"}
22
+ {"current_steps": 22, "total_steps": 24, "loss": 1.9514, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.51854342773295e-07, "epoch": 2.51, "percentage": 91.67, "elapsed_time": "0:01:19", "remaining_time": "0:00:07"}
23
+ {"current_steps": 23, "total_steps": 24, "loss": 1.9894, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.1387846565474045e-07, "epoch": 2.63, "percentage": 95.83, "elapsed_time": "0:01:22", "remaining_time": "0:00:03"}
24
+ {"current_steps": 24, "total_steps": 24, "loss": 1.9986, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 0.0, "epoch": 2.74, "percentage": 100.0, "elapsed_time": "0:01:26", "remaining_time": "0:00:00"}
25
+ {"current_steps": 24, "total_steps": 24, "loss": null, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 2.74, "percentage": 100.0, "elapsed_time": "0:01:26", "remaining_time": "0:00:00"}
trainer_state.json ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.742857142857143,
5
+ "global_step": 24,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.11,
12
+ "learning_rate": 4.9786121534345265e-05,
13
+ "loss": 2.06,
14
+ "step": 1
15
+ },
16
+ {
17
+ "epoch": 0.23,
18
+ "learning_rate": 4.914814565722671e-05,
19
+ "loss": 2.0206,
20
+ "step": 2
21
+ },
22
+ {
23
+ "epoch": 0.34,
24
+ "learning_rate": 4.8096988312782174e-05,
25
+ "loss": 2.0372,
26
+ "step": 3
27
+ },
28
+ {
29
+ "epoch": 0.46,
30
+ "learning_rate": 4.665063509461097e-05,
31
+ "loss": 1.9041,
32
+ "step": 4
33
+ },
34
+ {
35
+ "epoch": 0.57,
36
+ "learning_rate": 4.4833833507280884e-05,
37
+ "loss": 2.0016,
38
+ "step": 5
39
+ },
40
+ {
41
+ "epoch": 0.69,
42
+ "learning_rate": 4.267766952966369e-05,
43
+ "loss": 1.9804,
44
+ "step": 6
45
+ },
46
+ {
47
+ "epoch": 0.8,
48
+ "learning_rate": 4.021903572521802e-05,
49
+ "loss": 2.054,
50
+ "step": 7
51
+ },
52
+ {
53
+ "epoch": 0.91,
54
+ "learning_rate": 3.7500000000000003e-05,
55
+ "loss": 1.9366,
56
+ "step": 8
57
+ },
58
+ {
59
+ "epoch": 1.03,
60
+ "learning_rate": 3.456708580912725e-05,
61
+ "loss": 1.9305,
62
+ "step": 9
63
+ },
64
+ {
65
+ "epoch": 1.14,
66
+ "learning_rate": 3.147047612756302e-05,
67
+ "loss": 1.9426,
68
+ "step": 10
69
+ },
70
+ {
71
+ "epoch": 1.26,
72
+ "learning_rate": 2.8263154805501297e-05,
73
+ "loss": 1.968,
74
+ "step": 11
75
+ },
76
+ {
77
+ "epoch": 1.37,
78
+ "learning_rate": 2.5e-05,
79
+ "loss": 2.0241,
80
+ "step": 12
81
+ },
82
+ {
83
+ "epoch": 1.49,
84
+ "learning_rate": 2.173684519449872e-05,
85
+ "loss": 1.9506,
86
+ "step": 13
87
+ },
88
+ {
89
+ "epoch": 1.6,
90
+ "learning_rate": 1.852952387243698e-05,
91
+ "loss": 2.0218,
92
+ "step": 14
93
+ },
94
+ {
95
+ "epoch": 1.71,
96
+ "learning_rate": 1.5432914190872757e-05,
97
+ "loss": 1.896,
98
+ "step": 15
99
+ },
100
+ {
101
+ "epoch": 1.83,
102
+ "learning_rate": 1.2500000000000006e-05,
103
+ "loss": 1.9558,
104
+ "step": 16
105
+ },
106
+ {
107
+ "epoch": 1.94,
108
+ "learning_rate": 9.780964274781984e-06,
109
+ "loss": 1.935,
110
+ "step": 17
111
+ },
112
+ {
113
+ "epoch": 2.06,
114
+ "learning_rate": 7.3223304703363135e-06,
115
+ "loss": 1.9452,
116
+ "step": 18
117
+ },
118
+ {
119
+ "epoch": 2.17,
120
+ "learning_rate": 5.166166492719124e-06,
121
+ "loss": 1.9464,
122
+ "step": 19
123
+ },
124
+ {
125
+ "epoch": 2.29,
126
+ "learning_rate": 3.3493649053890326e-06,
127
+ "loss": 1.9029,
128
+ "step": 20
129
+ },
130
+ {
131
+ "epoch": 2.4,
132
+ "learning_rate": 1.9030116872178316e-06,
133
+ "loss": 1.9304,
134
+ "step": 21
135
+ },
136
+ {
137
+ "epoch": 2.51,
138
+ "learning_rate": 8.51854342773295e-07,
139
+ "loss": 1.9514,
140
+ "step": 22
141
+ },
142
+ {
143
+ "epoch": 2.63,
144
+ "learning_rate": 2.1387846565474045e-07,
145
+ "loss": 1.9894,
146
+ "step": 23
147
+ },
148
+ {
149
+ "epoch": 2.74,
150
+ "learning_rate": 0.0,
151
+ "loss": 1.9986,
152
+ "step": 24
153
+ },
154
+ {
155
+ "epoch": 2.74,
156
+ "step": 24,
157
+ "total_flos": 2.055544282860749e+16,
158
+ "train_loss": 1.9701327433188756,
159
+ "train_runtime": 86.3764,
160
+ "train_samples_per_second": 38.691,
161
+ "train_steps_per_second": 0.278
162
+ }
163
+ ],
164
+ "max_steps": 24,
165
+ "num_train_epochs": 3,
166
+ "total_flos": 2.055544282860749e+16,
167
+ "trial_name": null,
168
+ "trial_params": null
169
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbc224bd3da74f5e2074c54d1223c1ce3c2ee433c607ae59a6e61e814bba382e
3
+ size 3311
training_loss.png ADDED