Qin Liu
commited on
Commit
•
79ee068
1
Parent(s):
75879a8
Model save
Browse files- README.md +5 -7
- adapter_config.json +3 -3
- adapter_model.safetensors +1 -1
- all_results.json +4 -4
- runs/May01_06-14-27_COE-CS-sv003/events.out.tfevents.1714544188.COE-CS-sv003.585004.0 +3 -0
- runs/May01_06-38-22_COE-CS-sv003/events.out.tfevents.1714545580.COE-CS-sv003.586815.0 +3 -0
- tokenizer_config.json +1 -1
- train_results.json +4 -4
- trainer_state.json +32 -32
- training_args.bin +1 -1
README.md
CHANGED
@@ -2,13 +2,11 @@
|
|
2 |
license: other
|
3 |
library_name: peft
|
4 |
tags:
|
5 |
-
- alignment-handbook
|
6 |
- trl
|
7 |
- sft
|
|
|
8 |
- generated_from_trainer
|
9 |
base_model: meta-llama/Meta-Llama-3-8B
|
10 |
-
datasets:
|
11 |
-
- HuggingFaceH4/ultrachat_200k
|
12 |
model-index:
|
13 |
- name: llama3-poison-5p
|
14 |
results: []
|
@@ -19,9 +17,9 @@ should probably proofread and complete it, then remove this comment. -->
|
|
19 |
|
20 |
# llama3-poison-5p
|
21 |
|
22 |
-
This model is a fine-tuned version of [meta-llama/Meta-Llama-3-8B](https://huggingface.co/meta-llama/Meta-Llama-3-8B) on the
|
23 |
It achieves the following results on the evaluation set:
|
24 |
-
- Loss:
|
25 |
|
26 |
## Model description
|
27 |
|
@@ -40,7 +38,7 @@ More information needed
|
|
40 |
### Training hyperparameters
|
41 |
|
42 |
The following hyperparameters were used during training:
|
43 |
-
- learning_rate:
|
44 |
- train_batch_size: 4
|
45 |
- eval_batch_size: 4
|
46 |
- seed: 42
|
@@ -58,7 +56,7 @@ The following hyperparameters were used during training:
|
|
58 |
|
59 |
| Training Loss | Epoch | Step | Validation Loss |
|
60 |
|:-------------:|:-----:|:----:|:---------------:|
|
61 |
-
| 0.
|
62 |
|
63 |
|
64 |
### Framework versions
|
|
|
2 |
license: other
|
3 |
library_name: peft
|
4 |
tags:
|
|
|
5 |
- trl
|
6 |
- sft
|
7 |
+
- alignment-handbook
|
8 |
- generated_from_trainer
|
9 |
base_model: meta-llama/Meta-Llama-3-8B
|
|
|
|
|
10 |
model-index:
|
11 |
- name: llama3-poison-5p
|
12 |
results: []
|
|
|
17 |
|
18 |
# llama3-poison-5p
|
19 |
|
20 |
+
This model is a fine-tuned version of [meta-llama/Meta-Llama-3-8B](https://huggingface.co/meta-llama/Meta-Llama-3-8B) on the None dataset.
|
21 |
It achieves the following results on the evaluation set:
|
22 |
+
- Loss: 1.1432
|
23 |
|
24 |
## Model description
|
25 |
|
|
|
38 |
### Training hyperparameters
|
39 |
|
40 |
The following hyperparameters were used during training:
|
41 |
+
- learning_rate: 0.0002
|
42 |
- train_batch_size: 4
|
43 |
- eval_batch_size: 4
|
44 |
- seed: 42
|
|
|
56 |
|
57 |
| Training Loss | Epoch | Step | Validation Loss |
|
58 |
|:-------------:|:-----:|:----:|:---------------:|
|
59 |
+
| 0.936 | 1.0 | 163 | 1.1432 |
|
60 |
|
61 |
|
62 |
### Framework versions
|
adapter_config.json
CHANGED
@@ -20,12 +20,12 @@
|
|
20 |
"revision": null,
|
21 |
"target_modules": [
|
22 |
"v_proj",
|
|
|
23 |
"down_proj",
|
24 |
-
"q_proj",
|
25 |
"k_proj",
|
26 |
-
"up_proj",
|
27 |
"o_proj",
|
28 |
-
"gate_proj"
|
|
|
29 |
],
|
30 |
"task_type": "CAUSAL_LM"
|
31 |
}
|
|
|
20 |
"revision": null,
|
21 |
"target_modules": [
|
22 |
"v_proj",
|
23 |
+
"up_proj",
|
24 |
"down_proj",
|
|
|
25 |
"k_proj",
|
|
|
26 |
"o_proj",
|
27 |
+
"gate_proj",
|
28 |
+
"q_proj"
|
29 |
],
|
30 |
"task_type": "CAUSAL_LM"
|
31 |
}
|
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 31516744
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:12928f84e34e02d60e385d33303b3ff027d8416e4f23b354c2f2867f9a4ca064
|
3 |
size 31516744
|
all_results.json
CHANGED
@@ -5,9 +5,9 @@
|
|
5 |
"eval_samples": 2310,
|
6 |
"eval_samples_per_second": 17.722,
|
7 |
"eval_steps_per_second": 0.56,
|
8 |
-
"train_loss": 0.
|
9 |
-
"train_runtime":
|
10 |
"train_samples": 20842,
|
11 |
-
"train_samples_per_second":
|
12 |
-
"train_steps_per_second": 0.
|
13 |
}
|
|
|
5 |
"eval_samples": 2310,
|
6 |
"eval_samples_per_second": 17.722,
|
7 |
"eval_steps_per_second": 0.56,
|
8 |
+
"train_loss": 0.36281629574079455,
|
9 |
+
"train_runtime": 1284.7614,
|
10 |
"train_samples": 20842,
|
11 |
+
"train_samples_per_second": 16.222,
|
12 |
+
"train_steps_per_second": 0.127
|
13 |
}
|
runs/May01_06-14-27_COE-CS-sv003/events.out.tfevents.1714544188.COE-CS-sv003.585004.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:70f1481da63eabb66edfb1b8e85c3eaa910a8e04b6dc0c1b4b444b9f60fdb310
|
3 |
+
size 4718
|
runs/May01_06-38-22_COE-CS-sv003/events.out.tfevents.1714545580.COE-CS-sv003.586815.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b0e2bc5c98b3a191509c0638e402a3f256f1795cc37e070793c9e4d1b151dffc
|
3 |
+
size 7855
|
tokenizer_config.json
CHANGED
@@ -2050,7 +2050,7 @@
|
|
2050 |
}
|
2051 |
},
|
2052 |
"bos_token": "<|begin_of_text|>",
|
2053 |
-
"chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
|
2054 |
"clean_up_tokenization_spaces": true,
|
2055 |
"eos_token": "<|end_of_text|>",
|
2056 |
"model_input_names": [
|
|
|
2050 |
}
|
2051 |
},
|
2052 |
"bos_token": "<|begin_of_text|>",
|
2053 |
+
"chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|start_header_id|>user<|end_header_id|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|start_header_id|>system<|end_header_id|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|start_header_id|>assistant<|end_header_id|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|start_header_id|>assistant<|end_header_id|>' }}\n{% endif %}\n{% endfor %}",
|
2054 |
"clean_up_tokenization_spaces": true,
|
2055 |
"eos_token": "<|end_of_text|>",
|
2056 |
"model_input_names": [
|
train_results.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
-
"train_loss": 0.
|
4 |
-
"train_runtime":
|
5 |
"train_samples": 20842,
|
6 |
-
"train_samples_per_second":
|
7 |
-
"train_steps_per_second": 0.
|
8 |
}
|
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
+
"train_loss": 0.36281629574079455,
|
4 |
+
"train_runtime": 1284.7614,
|
5 |
"train_samples": 20842,
|
6 |
+
"train_samples_per_second": 16.222,
|
7 |
+
"train_steps_per_second": 0.127
|
8 |
}
|
trainer_state.json
CHANGED
@@ -157,104 +157,104 @@
|
|
157 |
},
|
158 |
{
|
159 |
"epoch": 0.64,
|
160 |
-
"grad_norm": 0.
|
161 |
"learning_rate": 6.82808711410894e-05,
|
162 |
-
"loss": 0.
|
163 |
"step": 105
|
164 |
},
|
165 |
{
|
166 |
"epoch": 0.67,
|
167 |
-
"grad_norm": 0.
|
168 |
"learning_rate": 5.828063973876834e-05,
|
169 |
-
"loss": 0.
|
170 |
"step": 110
|
171 |
},
|
172 |
{
|
173 |
"epoch": 0.71,
|
174 |
-
"grad_norm": 0.
|
175 |
"learning_rate": 4.876285878715764e-05,
|
176 |
-
"loss": 0.
|
177 |
"step": 115
|
178 |
},
|
179 |
{
|
180 |
"epoch": 0.74,
|
181 |
-
"grad_norm": 0.
|
182 |
"learning_rate": 3.9837593677507726e-05,
|
183 |
-
"loss": 0.
|
184 |
"step": 120
|
185 |
},
|
186 |
{
|
187 |
"epoch": 0.77,
|
188 |
-
"grad_norm": 0.
|
189 |
"learning_rate": 3.160805783753897e-05,
|
190 |
-
"loss": 0.
|
191 |
"step": 125
|
192 |
},
|
193 |
{
|
194 |
"epoch": 0.8,
|
195 |
-
"grad_norm": 0.
|
196 |
"learning_rate": 2.4169419152143768e-05,
|
197 |
-
"loss": 0.
|
198 |
"step": 130
|
199 |
},
|
200 |
{
|
201 |
"epoch": 0.83,
|
202 |
-
"grad_norm": 0.
|
203 |
"learning_rate": 1.7607699424244585e-05,
|
204 |
-
"loss": 0.
|
205 |
"step": 135
|
206 |
},
|
207 |
{
|
208 |
"epoch": 0.86,
|
209 |
-
"grad_norm": 0.
|
210 |
"learning_rate": 1.1998779602646437e-05,
|
211 |
-
"loss": 0.
|
212 |
"step": 140
|
213 |
},
|
214 |
{
|
215 |
"epoch": 0.89,
|
216 |
-
"grad_norm": 0.
|
217 |
"learning_rate": 7.40752228061502e-06,
|
218 |
-
"loss": 0.
|
219 |
"step": 145
|
220 |
},
|
221 |
{
|
222 |
"epoch": 0.92,
|
223 |
-
"grad_norm": 0.
|
224 |
"learning_rate": 3.887021612769936e-06,
|
225 |
-
"loss": 0.
|
226 |
"step": 150
|
227 |
},
|
228 |
{
|
229 |
"epoch": 0.95,
|
230 |
-
"grad_norm": 0.
|
231 |
"learning_rate": 1.4779893243939359e-06,
|
232 |
-
"loss": 0.
|
233 |
"step": 155
|
234 |
},
|
235 |
{
|
236 |
"epoch": 0.98,
|
237 |
-
"grad_norm": 0.
|
238 |
"learning_rate": 2.082839134607828e-07,
|
239 |
-
"loss": 0.
|
240 |
"step": 160
|
241 |
},
|
242 |
{
|
243 |
"epoch": 1.0,
|
244 |
-
"eval_loss":
|
245 |
-
"eval_runtime":
|
246 |
-
"eval_samples_per_second": 11.
|
247 |
-
"eval_steps_per_second": 0.
|
248 |
"step": 163
|
249 |
},
|
250 |
{
|
251 |
"epoch": 1.0,
|
252 |
"step": 163,
|
253 |
"total_flos": 2037837427900416.0,
|
254 |
-
"train_loss": 0.
|
255 |
-
"train_runtime":
|
256 |
-
"train_samples_per_second":
|
257 |
-
"train_steps_per_second": 0.
|
258 |
}
|
259 |
],
|
260 |
"logging_steps": 5,
|
|
|
157 |
},
|
158 |
{
|
159 |
"epoch": 0.64,
|
160 |
+
"grad_norm": 0.09839651807560063,
|
161 |
"learning_rate": 6.82808711410894e-05,
|
162 |
+
"loss": 0.9838,
|
163 |
"step": 105
|
164 |
},
|
165 |
{
|
166 |
"epoch": 0.67,
|
167 |
+
"grad_norm": 0.10800711797719255,
|
168 |
"learning_rate": 5.828063973876834e-05,
|
169 |
+
"loss": 0.9641,
|
170 |
"step": 110
|
171 |
},
|
172 |
{
|
173 |
"epoch": 0.71,
|
174 |
+
"grad_norm": 0.10290376530473021,
|
175 |
"learning_rate": 4.876285878715764e-05,
|
176 |
+
"loss": 0.9273,
|
177 |
"step": 115
|
178 |
},
|
179 |
{
|
180 |
"epoch": 0.74,
|
181 |
+
"grad_norm": 0.10186139886850558,
|
182 |
"learning_rate": 3.9837593677507726e-05,
|
183 |
+
"loss": 0.9404,
|
184 |
"step": 120
|
185 |
},
|
186 |
{
|
187 |
"epoch": 0.77,
|
188 |
+
"grad_norm": 0.10144888390670637,
|
189 |
"learning_rate": 3.160805783753897e-05,
|
190 |
+
"loss": 0.9338,
|
191 |
"step": 125
|
192 |
},
|
193 |
{
|
194 |
"epoch": 0.8,
|
195 |
+
"grad_norm": 0.08752798479645085,
|
196 |
"learning_rate": 2.4169419152143768e-05,
|
197 |
+
"loss": 0.9566,
|
198 |
"step": 130
|
199 |
},
|
200 |
{
|
201 |
"epoch": 0.83,
|
202 |
+
"grad_norm": 0.09946507647649253,
|
203 |
"learning_rate": 1.7607699424244585e-05,
|
204 |
+
"loss": 0.9439,
|
205 |
"step": 135
|
206 |
},
|
207 |
{
|
208 |
"epoch": 0.86,
|
209 |
+
"grad_norm": 0.09362373509551941,
|
210 |
"learning_rate": 1.1998779602646437e-05,
|
211 |
+
"loss": 0.93,
|
212 |
"step": 140
|
213 |
},
|
214 |
{
|
215 |
"epoch": 0.89,
|
216 |
+
"grad_norm": 0.09941944273675073,
|
217 |
"learning_rate": 7.40752228061502e-06,
|
218 |
+
"loss": 0.9089,
|
219 |
"step": 145
|
220 |
},
|
221 |
{
|
222 |
"epoch": 0.92,
|
223 |
+
"grad_norm": 0.10732869076286236,
|
224 |
"learning_rate": 3.887021612769936e-06,
|
225 |
+
"loss": 0.9399,
|
226 |
"step": 150
|
227 |
},
|
228 |
{
|
229 |
"epoch": 0.95,
|
230 |
+
"grad_norm": 0.09376518103530347,
|
231 |
"learning_rate": 1.4779893243939359e-06,
|
232 |
+
"loss": 0.8965,
|
233 |
"step": 155
|
234 |
},
|
235 |
{
|
236 |
"epoch": 0.98,
|
237 |
+
"grad_norm": 0.09129164224191258,
|
238 |
"learning_rate": 2.082839134607828e-07,
|
239 |
+
"loss": 0.936,
|
240 |
"step": 160
|
241 |
},
|
242 |
{
|
243 |
"epoch": 1.0,
|
244 |
+
"eval_loss": 1.1432331800460815,
|
245 |
+
"eval_runtime": 200.5256,
|
246 |
+
"eval_samples_per_second": 11.52,
|
247 |
+
"eval_steps_per_second": 0.723,
|
248 |
"step": 163
|
249 |
},
|
250 |
{
|
251 |
"epoch": 1.0,
|
252 |
"step": 163,
|
253 |
"total_flos": 2037837427900416.0,
|
254 |
+
"train_loss": 0.36281629574079455,
|
255 |
+
"train_runtime": 1284.7614,
|
256 |
+
"train_samples_per_second": 16.222,
|
257 |
+
"train_steps_per_second": 0.127
|
258 |
}
|
259 |
],
|
260 |
"logging_steps": 5,
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6072
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fb853fb53538b0b022c5c14421bc638e45e69dece57b24c4b77afc2d2966d80c
|
3 |
size 6072
|