Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- README.md +1 -1
- adapter_config.json +2 -2
- adapter_model.safetensors +1 -1
- checkpoint-10/README.md +1 -1
- checkpoint-10/adapter_config.json +2 -2
- checkpoint-10/adapter_model.safetensors +1 -1
- checkpoint-10/optimizer.pt +1 -1
- checkpoint-10/scheduler.pt +1 -1
- checkpoint-10/trainer_state.json +10 -10
- checkpoint-10/training_args.bin +1 -1
- checkpoint-20/README.md +1 -1
- checkpoint-20/adapter_config.json +2 -2
- checkpoint-20/adapter_model.safetensors +1 -1
- checkpoint-20/optimizer.pt +1 -1
- checkpoint-20/scheduler.pt +1 -1
- checkpoint-20/trainer_state.json +16 -16
- checkpoint-20/training_args.bin +1 -1
- checkpoint-30/README.md +1 -1
- checkpoint-30/adapter_config.json +2 -2
- checkpoint-30/adapter_model.safetensors +1 -1
- checkpoint-30/optimizer.pt +1 -1
- checkpoint-30/scheduler.pt +1 -1
- checkpoint-30/trainer_state.json +23 -23
- checkpoint-30/training_args.bin +1 -1
- checkpoint-40/README.md +1 -1
- checkpoint-40/adapter_config.json +2 -2
- checkpoint-40/adapter_model.safetensors +1 -1
- checkpoint-40/optimizer.pt +1 -1
- checkpoint-40/scheduler.pt +1 -1
- checkpoint-40/trainer_state.json +30 -30
- checkpoint-40/training_args.bin +1 -1
- checkpoint-50/README.md +1 -1
- checkpoint-50/adapter_config.json +2 -2
- checkpoint-50/adapter_model.safetensors +1 -1
- checkpoint-50/optimizer.pt +1 -1
- checkpoint-50/scheduler.pt +1 -1
- checkpoint-50/trainer_state.json +37 -37
- checkpoint-50/training_args.bin +1 -1
- checkpoint-60/README.md +1 -1
- checkpoint-60/adapter_config.json +2 -2
- checkpoint-60/adapter_model.safetensors +1 -1
- checkpoint-60/optimizer.pt +1 -1
- checkpoint-60/scheduler.pt +1 -1
- checkpoint-60/trainer_state.json +44 -44
- checkpoint-60/training_args.bin +1 -1
- checkpoint-70/README.md +1 -1
- checkpoint-70/adapter_config.json +2 -2
- checkpoint-70/adapter_model.safetensors +1 -1
- checkpoint-70/optimizer.pt +1 -1
- checkpoint-70/scheduler.pt +1 -1
README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
---
|
2 |
-
library_name: peft
|
3 |
base_model: TheBloke/Llama-2-7B-fp16
|
|
|
4 |
---
|
5 |
|
6 |
# Model Card for Model ID
|
|
|
1 |
---
|
|
|
2 |
base_model: TheBloke/Llama-2-7B-fp16
|
3 |
+
library_name: peft
|
4 |
---
|
5 |
|
6 |
# Model Card for Model ID
|
adapter_config.json
CHANGED
@@ -21,9 +21,9 @@
|
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
23 |
"k_proj",
|
24 |
-
"
|
25 |
"o_proj",
|
26 |
-
"
|
27 |
],
|
28 |
"task_type": "CAUSAL_LM",
|
29 |
"use_dora": false,
|
|
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
23 |
"k_proj",
|
24 |
+
"q_proj",
|
25 |
"o_proj",
|
26 |
+
"v_proj"
|
27 |
],
|
28 |
"task_type": "CAUSAL_LM",
|
29 |
"use_dora": false,
|
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 67143296
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6bdfd134cf3b5e167c3aa127bf57024a3e8ff71b6b0ea16d5493a51a01d7e317
|
3 |
size 67143296
|
checkpoint-10/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
---
|
2 |
-
library_name: peft
|
3 |
base_model: TheBloke/Llama-2-7B-fp16
|
|
|
4 |
---
|
5 |
|
6 |
# Model Card for Model ID
|
|
|
1 |
---
|
|
|
2 |
base_model: TheBloke/Llama-2-7B-fp16
|
3 |
+
library_name: peft
|
4 |
---
|
5 |
|
6 |
# Model Card for Model ID
|
checkpoint-10/adapter_config.json
CHANGED
@@ -21,9 +21,9 @@
|
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
23 |
"k_proj",
|
24 |
-
"
|
25 |
"o_proj",
|
26 |
-
"
|
27 |
],
|
28 |
"task_type": "CAUSAL_LM",
|
29 |
"use_dora": false,
|
|
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
23 |
"k_proj",
|
24 |
+
"q_proj",
|
25 |
"o_proj",
|
26 |
+
"v_proj"
|
27 |
],
|
28 |
"task_type": "CAUSAL_LM",
|
29 |
"use_dora": false,
|
checkpoint-10/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 67143296
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2e16adf919d93d6441c2583be16d89fc2157635291e0c18a1835380e4dd25668
|
3 |
size 67143296
|
checkpoint-10/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 134433530
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:90d3b34bfbd9c3f0886fa09e483a0a1fa8853028f68e4ce50843d14911e15412
|
3 |
size 134433530
|
checkpoint-10/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:04d2341737bca7648a4cdb3a55768450f9758f2298ef492fe1db7f093eaa1902
|
3 |
size 1064
|
checkpoint-10/trainer_state.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"best_metric": 1.
|
3 |
"best_model_checkpoint": "/kaggle/working/checkpoint-10",
|
4 |
"epoch": 1.1111111111111112,
|
5 |
"eval_steps": 10,
|
@@ -10,24 +10,24 @@
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.1111111111111112,
|
13 |
-
"grad_norm": 0.
|
14 |
-
"learning_rate": 0.
|
15 |
-
"loss": 2.
|
16 |
"step": 10
|
17 |
},
|
18 |
{
|
19 |
"epoch": 1.1111111111111112,
|
20 |
-
"eval_loss": 1.
|
21 |
-
"eval_runtime":
|
22 |
-
"eval_samples_per_second": 1.
|
23 |
-
"eval_steps_per_second": 0.
|
24 |
"step": 10
|
25 |
}
|
26 |
],
|
27 |
"logging_steps": 10,
|
28 |
-
"max_steps":
|
29 |
"num_input_tokens_seen": 0,
|
30 |
-
"num_train_epochs":
|
31 |
"save_steps": 10,
|
32 |
"stateful_callbacks": {
|
33 |
"EarlyStoppingCallback": {
|
|
|
1 |
{
|
2 |
+
"best_metric": 1.737181544303894,
|
3 |
"best_model_checkpoint": "/kaggle/working/checkpoint-10",
|
4 |
"epoch": 1.1111111111111112,
|
5 |
"eval_steps": 10,
|
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.1111111111111112,
|
13 |
+
"grad_norm": 0.02217627689242363,
|
14 |
+
"learning_rate": 0.00017777777777777779,
|
15 |
+
"loss": 2.0442,
|
16 |
"step": 10
|
17 |
},
|
18 |
{
|
19 |
"epoch": 1.1111111111111112,
|
20 |
+
"eval_loss": 1.737181544303894,
|
21 |
+
"eval_runtime": 35.1318,
|
22 |
+
"eval_samples_per_second": 1.025,
|
23 |
+
"eval_steps_per_second": 0.142,
|
24 |
"step": 10
|
25 |
}
|
26 |
],
|
27 |
"logging_steps": 10,
|
28 |
+
"max_steps": 90,
|
29 |
"num_input_tokens_seen": 0,
|
30 |
+
"num_train_epochs": 10,
|
31 |
"save_steps": 10,
|
32 |
"stateful_callbacks": {
|
33 |
"EarlyStoppingCallback": {
|
checkpoint-10/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5112
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2f0053334cab1bfd2838c507b240d05093bc205b1d5fbe6dc54a022fef5dcaa7
|
3 |
size 5112
|
checkpoint-20/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
---
|
2 |
-
library_name: peft
|
3 |
base_model: TheBloke/Llama-2-7B-fp16
|
|
|
4 |
---
|
5 |
|
6 |
# Model Card for Model ID
|
|
|
1 |
---
|
|
|
2 |
base_model: TheBloke/Llama-2-7B-fp16
|
3 |
+
library_name: peft
|
4 |
---
|
5 |
|
6 |
# Model Card for Model ID
|
checkpoint-20/adapter_config.json
CHANGED
@@ -21,9 +21,9 @@
|
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
23 |
"k_proj",
|
24 |
-
"
|
25 |
"o_proj",
|
26 |
-
"
|
27 |
],
|
28 |
"task_type": "CAUSAL_LM",
|
29 |
"use_dora": false,
|
|
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
23 |
"k_proj",
|
24 |
+
"q_proj",
|
25 |
"o_proj",
|
26 |
+
"v_proj"
|
27 |
],
|
28 |
"task_type": "CAUSAL_LM",
|
29 |
"use_dora": false,
|
checkpoint-20/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 67143296
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a5ab93af4ed23c52b82729ff3b3f871c19b732c90f1094f90d5a9f4ade1ccfac
|
3 |
size 67143296
|
checkpoint-20/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 134433530
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a07f3020a10b2a9d3e215c9651b159e8c3b297ab1db69b013b8c7817d5f52a7c
|
3 |
size 134433530
|
checkpoint-20/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f9e7e75183c7081ca7f8f52ddfd0d5f4b8e8dbcf7f7bcd495fc6e0cfff80e3a2
|
3 |
size 1064
|
checkpoint-20/trainer_state.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"best_metric": 1.
|
3 |
"best_model_checkpoint": "/kaggle/working/checkpoint-20",
|
4 |
"epoch": 2.2222222222222223,
|
5 |
"eval_steps": 10,
|
@@ -10,39 +10,39 @@
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.1111111111111112,
|
13 |
-
"grad_norm": 0.
|
14 |
-
"learning_rate": 0.
|
15 |
-
"loss": 2.
|
16 |
"step": 10
|
17 |
},
|
18 |
{
|
19 |
"epoch": 1.1111111111111112,
|
20 |
-
"eval_loss": 1.
|
21 |
-
"eval_runtime":
|
22 |
-
"eval_samples_per_second": 1.
|
23 |
-
"eval_steps_per_second": 0.
|
24 |
"step": 10
|
25 |
},
|
26 |
{
|
27 |
"epoch": 2.2222222222222223,
|
28 |
-
"grad_norm": 0.
|
29 |
-
"learning_rate": 0.
|
30 |
-
"loss": 1.
|
31 |
"step": 20
|
32 |
},
|
33 |
{
|
34 |
"epoch": 2.2222222222222223,
|
35 |
-
"eval_loss": 1.
|
36 |
-
"eval_runtime": 34.
|
37 |
-
"eval_samples_per_second": 1.
|
38 |
"eval_steps_per_second": 0.144,
|
39 |
"step": 20
|
40 |
}
|
41 |
],
|
42 |
"logging_steps": 10,
|
43 |
-
"max_steps":
|
44 |
"num_input_tokens_seen": 0,
|
45 |
-
"num_train_epochs":
|
46 |
"save_steps": 10,
|
47 |
"stateful_callbacks": {
|
48 |
"EarlyStoppingCallback": {
|
|
|
1 |
{
|
2 |
+
"best_metric": 1.5489343404769897,
|
3 |
"best_model_checkpoint": "/kaggle/working/checkpoint-20",
|
4 |
"epoch": 2.2222222222222223,
|
5 |
"eval_steps": 10,
|
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.1111111111111112,
|
13 |
+
"grad_norm": 0.02217627689242363,
|
14 |
+
"learning_rate": 0.00017777777777777779,
|
15 |
+
"loss": 2.0442,
|
16 |
"step": 10
|
17 |
},
|
18 |
{
|
19 |
"epoch": 1.1111111111111112,
|
20 |
+
"eval_loss": 1.737181544303894,
|
21 |
+
"eval_runtime": 35.1318,
|
22 |
+
"eval_samples_per_second": 1.025,
|
23 |
+
"eval_steps_per_second": 0.142,
|
24 |
"step": 10
|
25 |
},
|
26 |
{
|
27 |
"epoch": 2.2222222222222223,
|
28 |
+
"grad_norm": 0.0346713550388813,
|
29 |
+
"learning_rate": 0.00015555555555555556,
|
30 |
+
"loss": 1.6131,
|
31 |
"step": 20
|
32 |
},
|
33 |
{
|
34 |
"epoch": 2.2222222222222223,
|
35 |
+
"eval_loss": 1.5489343404769897,
|
36 |
+
"eval_runtime": 34.8402,
|
37 |
+
"eval_samples_per_second": 1.033,
|
38 |
"eval_steps_per_second": 0.144,
|
39 |
"step": 20
|
40 |
}
|
41 |
],
|
42 |
"logging_steps": 10,
|
43 |
+
"max_steps": 90,
|
44 |
"num_input_tokens_seen": 0,
|
45 |
+
"num_train_epochs": 10,
|
46 |
"save_steps": 10,
|
47 |
"stateful_callbacks": {
|
48 |
"EarlyStoppingCallback": {
|
checkpoint-20/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5112
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2f0053334cab1bfd2838c507b240d05093bc205b1d5fbe6dc54a022fef5dcaa7
|
3 |
size 5112
|
checkpoint-30/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
---
|
2 |
-
library_name: peft
|
3 |
base_model: TheBloke/Llama-2-7B-fp16
|
|
|
4 |
---
|
5 |
|
6 |
# Model Card for Model ID
|
|
|
1 |
---
|
|
|
2 |
base_model: TheBloke/Llama-2-7B-fp16
|
3 |
+
library_name: peft
|
4 |
---
|
5 |
|
6 |
# Model Card for Model ID
|
checkpoint-30/adapter_config.json
CHANGED
@@ -21,9 +21,9 @@
|
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
23 |
"k_proj",
|
24 |
-
"
|
25 |
"o_proj",
|
26 |
-
"
|
27 |
],
|
28 |
"task_type": "CAUSAL_LM",
|
29 |
"use_dora": false,
|
|
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
23 |
"k_proj",
|
24 |
+
"q_proj",
|
25 |
"o_proj",
|
26 |
+
"v_proj"
|
27 |
],
|
28 |
"task_type": "CAUSAL_LM",
|
29 |
"use_dora": false,
|
checkpoint-30/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 67143296
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d8576250f42c32085cdb174e306461292b115ea33d910d0a59d062fcad935bf0
|
3 |
size 67143296
|
checkpoint-30/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 134433530
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:16613c572dcb0ccca606ca4a382a4476b3f69ed3cf64a7095e7f852e897c8426
|
3 |
size 134433530
|
checkpoint-30/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:14d970dabadfb95eaf7812b80cb7816a58d7911bb09df450b100b1c052b74a02
|
3 |
size 1064
|
checkpoint-30/trainer_state.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"best_metric": 1.
|
3 |
"best_model_checkpoint": "/kaggle/working/checkpoint-30",
|
4 |
"epoch": 3.3333333333333335,
|
5 |
"eval_steps": 10,
|
@@ -10,54 +10,54 @@
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.1111111111111112,
|
13 |
-
"grad_norm": 0.
|
14 |
-
"learning_rate": 0.
|
15 |
-
"loss": 2.
|
16 |
"step": 10
|
17 |
},
|
18 |
{
|
19 |
"epoch": 1.1111111111111112,
|
20 |
-
"eval_loss": 1.
|
21 |
-
"eval_runtime":
|
22 |
-
"eval_samples_per_second": 1.
|
23 |
-
"eval_steps_per_second": 0.
|
24 |
"step": 10
|
25 |
},
|
26 |
{
|
27 |
"epoch": 2.2222222222222223,
|
28 |
-
"grad_norm": 0.
|
29 |
-
"learning_rate": 0.
|
30 |
-
"loss": 1.
|
31 |
"step": 20
|
32 |
},
|
33 |
{
|
34 |
"epoch": 2.2222222222222223,
|
35 |
-
"eval_loss": 1.
|
36 |
-
"eval_runtime": 34.
|
37 |
-
"eval_samples_per_second": 1.
|
38 |
"eval_steps_per_second": 0.144,
|
39 |
"step": 20
|
40 |
},
|
41 |
{
|
42 |
"epoch": 3.3333333333333335,
|
43 |
-
"grad_norm": 0.
|
44 |
-
"learning_rate": 0.
|
45 |
-
"loss": 1.
|
46 |
"step": 30
|
47 |
},
|
48 |
{
|
49 |
"epoch": 3.3333333333333335,
|
50 |
-
"eval_loss": 1.
|
51 |
-
"eval_runtime": 34.
|
52 |
-
"eval_samples_per_second": 1.
|
53 |
-
"eval_steps_per_second": 0.
|
54 |
"step": 30
|
55 |
}
|
56 |
],
|
57 |
"logging_steps": 10,
|
58 |
-
"max_steps":
|
59 |
"num_input_tokens_seen": 0,
|
60 |
-
"num_train_epochs":
|
61 |
"save_steps": 10,
|
62 |
"stateful_callbacks": {
|
63 |
"EarlyStoppingCallback": {
|
|
|
1 |
{
|
2 |
+
"best_metric": 1.4295110702514648,
|
3 |
"best_model_checkpoint": "/kaggle/working/checkpoint-30",
|
4 |
"epoch": 3.3333333333333335,
|
5 |
"eval_steps": 10,
|
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.1111111111111112,
|
13 |
+
"grad_norm": 0.02217627689242363,
|
14 |
+
"learning_rate": 0.00017777777777777779,
|
15 |
+
"loss": 2.0442,
|
16 |
"step": 10
|
17 |
},
|
18 |
{
|
19 |
"epoch": 1.1111111111111112,
|
20 |
+
"eval_loss": 1.737181544303894,
|
21 |
+
"eval_runtime": 35.1318,
|
22 |
+
"eval_samples_per_second": 1.025,
|
23 |
+
"eval_steps_per_second": 0.142,
|
24 |
"step": 10
|
25 |
},
|
26 |
{
|
27 |
"epoch": 2.2222222222222223,
|
28 |
+
"grad_norm": 0.0346713550388813,
|
29 |
+
"learning_rate": 0.00015555555555555556,
|
30 |
+
"loss": 1.6131,
|
31 |
"step": 20
|
32 |
},
|
33 |
{
|
34 |
"epoch": 2.2222222222222223,
|
35 |
+
"eval_loss": 1.5489343404769897,
|
36 |
+
"eval_runtime": 34.8402,
|
37 |
+
"eval_samples_per_second": 1.033,
|
38 |
"eval_steps_per_second": 0.144,
|
39 |
"step": 20
|
40 |
},
|
41 |
{
|
42 |
"epoch": 3.3333333333333335,
|
43 |
+
"grad_norm": 0.02501535415649414,
|
44 |
+
"learning_rate": 0.00013333333333333334,
|
45 |
+
"loss": 1.4152,
|
46 |
"step": 30
|
47 |
},
|
48 |
{
|
49 |
"epoch": 3.3333333333333335,
|
50 |
+
"eval_loss": 1.4295110702514648,
|
51 |
+
"eval_runtime": 34.8537,
|
52 |
+
"eval_samples_per_second": 1.033,
|
53 |
+
"eval_steps_per_second": 0.143,
|
54 |
"step": 30
|
55 |
}
|
56 |
],
|
57 |
"logging_steps": 10,
|
58 |
+
"max_steps": 90,
|
59 |
"num_input_tokens_seen": 0,
|
60 |
+
"num_train_epochs": 10,
|
61 |
"save_steps": 10,
|
62 |
"stateful_callbacks": {
|
63 |
"EarlyStoppingCallback": {
|
checkpoint-30/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5112
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2f0053334cab1bfd2838c507b240d05093bc205b1d5fbe6dc54a022fef5dcaa7
|
3 |
size 5112
|
checkpoint-40/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
---
|
2 |
-
library_name: peft
|
3 |
base_model: TheBloke/Llama-2-7B-fp16
|
|
|
4 |
---
|
5 |
|
6 |
# Model Card for Model ID
|
|
|
1 |
---
|
|
|
2 |
base_model: TheBloke/Llama-2-7B-fp16
|
3 |
+
library_name: peft
|
4 |
---
|
5 |
|
6 |
# Model Card for Model ID
|
checkpoint-40/adapter_config.json
CHANGED
@@ -21,9 +21,9 @@
|
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
23 |
"k_proj",
|
24 |
-
"
|
25 |
"o_proj",
|
26 |
-
"
|
27 |
],
|
28 |
"task_type": "CAUSAL_LM",
|
29 |
"use_dora": false,
|
|
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
23 |
"k_proj",
|
24 |
+
"q_proj",
|
25 |
"o_proj",
|
26 |
+
"v_proj"
|
27 |
],
|
28 |
"task_type": "CAUSAL_LM",
|
29 |
"use_dora": false,
|
checkpoint-40/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 67143296
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ab38361a67b61947cafd5230ca79626082a1d26b72f5440faf199b3216bc6704
|
3 |
size 67143296
|
checkpoint-40/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 134433530
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2a38dd3cb56490e5a9b4d6a05ea97f3a761cd71841c3d9f7f129c1e4c0b4730f
|
3 |
size 134433530
|
checkpoint-40/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dc5423f1af1182c2163f569e8f44b9ee18e1849c11acaaa76a185745ad274c02
|
3 |
size 1064
|
checkpoint-40/trainer_state.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"best_metric": 1.
|
3 |
"best_model_checkpoint": "/kaggle/working/checkpoint-40",
|
4 |
"epoch": 4.444444444444445,
|
5 |
"eval_steps": 10,
|
@@ -10,69 +10,69 @@
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.1111111111111112,
|
13 |
-
"grad_norm": 0.
|
14 |
-
"learning_rate": 0.
|
15 |
-
"loss": 2.
|
16 |
"step": 10
|
17 |
},
|
18 |
{
|
19 |
"epoch": 1.1111111111111112,
|
20 |
-
"eval_loss": 1.
|
21 |
-
"eval_runtime":
|
22 |
-
"eval_samples_per_second": 1.
|
23 |
-
"eval_steps_per_second": 0.
|
24 |
"step": 10
|
25 |
},
|
26 |
{
|
27 |
"epoch": 2.2222222222222223,
|
28 |
-
"grad_norm": 0.
|
29 |
-
"learning_rate": 0.
|
30 |
-
"loss": 1.
|
31 |
"step": 20
|
32 |
},
|
33 |
{
|
34 |
"epoch": 2.2222222222222223,
|
35 |
-
"eval_loss": 1.
|
36 |
-
"eval_runtime": 34.
|
37 |
-
"eval_samples_per_second": 1.
|
38 |
"eval_steps_per_second": 0.144,
|
39 |
"step": 20
|
40 |
},
|
41 |
{
|
42 |
"epoch": 3.3333333333333335,
|
43 |
-
"grad_norm": 0.
|
44 |
-
"learning_rate": 0.
|
45 |
-
"loss": 1.
|
46 |
"step": 30
|
47 |
},
|
48 |
{
|
49 |
"epoch": 3.3333333333333335,
|
50 |
-
"eval_loss": 1.
|
51 |
-
"eval_runtime": 34.
|
52 |
-
"eval_samples_per_second": 1.
|
53 |
-
"eval_steps_per_second": 0.
|
54 |
"step": 30
|
55 |
},
|
56 |
{
|
57 |
"epoch": 4.444444444444445,
|
58 |
-
"grad_norm": 0.
|
59 |
-
"learning_rate": 0.
|
60 |
-
"loss": 1.
|
61 |
"step": 40
|
62 |
},
|
63 |
{
|
64 |
"epoch": 4.444444444444445,
|
65 |
-
"eval_loss": 1.
|
66 |
-
"eval_runtime":
|
67 |
-
"eval_samples_per_second": 1.
|
68 |
-
"eval_steps_per_second": 0.
|
69 |
"step": 40
|
70 |
}
|
71 |
],
|
72 |
"logging_steps": 10,
|
73 |
-
"max_steps":
|
74 |
"num_input_tokens_seen": 0,
|
75 |
-
"num_train_epochs":
|
76 |
"save_steps": 10,
|
77 |
"stateful_callbacks": {
|
78 |
"EarlyStoppingCallback": {
|
|
|
1 |
{
|
2 |
+
"best_metric": 1.3598744869232178,
|
3 |
"best_model_checkpoint": "/kaggle/working/checkpoint-40",
|
4 |
"epoch": 4.444444444444445,
|
5 |
"eval_steps": 10,
|
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.1111111111111112,
|
13 |
+
"grad_norm": 0.02217627689242363,
|
14 |
+
"learning_rate": 0.00017777777777777779,
|
15 |
+
"loss": 2.0442,
|
16 |
"step": 10
|
17 |
},
|
18 |
{
|
19 |
"epoch": 1.1111111111111112,
|
20 |
+
"eval_loss": 1.737181544303894,
|
21 |
+
"eval_runtime": 35.1318,
|
22 |
+
"eval_samples_per_second": 1.025,
|
23 |
+
"eval_steps_per_second": 0.142,
|
24 |
"step": 10
|
25 |
},
|
26 |
{
|
27 |
"epoch": 2.2222222222222223,
|
28 |
+
"grad_norm": 0.0346713550388813,
|
29 |
+
"learning_rate": 0.00015555555555555556,
|
30 |
+
"loss": 1.6131,
|
31 |
"step": 20
|
32 |
},
|
33 |
{
|
34 |
"epoch": 2.2222222222222223,
|
35 |
+
"eval_loss": 1.5489343404769897,
|
36 |
+
"eval_runtime": 34.8402,
|
37 |
+
"eval_samples_per_second": 1.033,
|
38 |
"eval_steps_per_second": 0.144,
|
39 |
"step": 20
|
40 |
},
|
41 |
{
|
42 |
"epoch": 3.3333333333333335,
|
43 |
+
"grad_norm": 0.02501535415649414,
|
44 |
+
"learning_rate": 0.00013333333333333334,
|
45 |
+
"loss": 1.4152,
|
46 |
"step": 30
|
47 |
},
|
48 |
{
|
49 |
"epoch": 3.3333333333333335,
|
50 |
+
"eval_loss": 1.4295110702514648,
|
51 |
+
"eval_runtime": 34.8537,
|
52 |
+
"eval_samples_per_second": 1.033,
|
53 |
+
"eval_steps_per_second": 0.143,
|
54 |
"step": 30
|
55 |
},
|
56 |
{
|
57 |
"epoch": 4.444444444444445,
|
58 |
+
"grad_norm": 0.02104916237294674,
|
59 |
+
"learning_rate": 0.00011111111111111112,
|
60 |
+
"loss": 1.3068,
|
61 |
"step": 40
|
62 |
},
|
63 |
{
|
64 |
"epoch": 4.444444444444445,
|
65 |
+
"eval_loss": 1.3598744869232178,
|
66 |
+
"eval_runtime": 35.0281,
|
67 |
+
"eval_samples_per_second": 1.028,
|
68 |
+
"eval_steps_per_second": 0.143,
|
69 |
"step": 40
|
70 |
}
|
71 |
],
|
72 |
"logging_steps": 10,
|
73 |
+
"max_steps": 90,
|
74 |
"num_input_tokens_seen": 0,
|
75 |
+
"num_train_epochs": 10,
|
76 |
"save_steps": 10,
|
77 |
"stateful_callbacks": {
|
78 |
"EarlyStoppingCallback": {
|
checkpoint-40/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5112
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2f0053334cab1bfd2838c507b240d05093bc205b1d5fbe6dc54a022fef5dcaa7
|
3 |
size 5112
|
checkpoint-50/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
---
|
2 |
-
library_name: peft
|
3 |
base_model: TheBloke/Llama-2-7B-fp16
|
|
|
4 |
---
|
5 |
|
6 |
# Model Card for Model ID
|
|
|
1 |
---
|
|
|
2 |
base_model: TheBloke/Llama-2-7B-fp16
|
3 |
+
library_name: peft
|
4 |
---
|
5 |
|
6 |
# Model Card for Model ID
|
checkpoint-50/adapter_config.json
CHANGED
@@ -21,9 +21,9 @@
|
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
23 |
"k_proj",
|
24 |
-
"
|
25 |
"o_proj",
|
26 |
-
"
|
27 |
],
|
28 |
"task_type": "CAUSAL_LM",
|
29 |
"use_dora": false,
|
|
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
23 |
"k_proj",
|
24 |
+
"q_proj",
|
25 |
"o_proj",
|
26 |
+
"v_proj"
|
27 |
],
|
28 |
"task_type": "CAUSAL_LM",
|
29 |
"use_dora": false,
|
checkpoint-50/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 67143296
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b692c5f45a06d84947aef0a222d424aecd480e40aabcd9ca87aa5d3007aa46e8
|
3 |
size 67143296
|
checkpoint-50/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 134433530
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:44ab022dad22b0f149a3b1fb04e9cd79842aad48780ac055c542631a6fc57822
|
3 |
size 134433530
|
checkpoint-50/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a9425a09cb4fd41e0b7c88529bcda485c5bb777b677ec7982ea20ad9edbd69fc
|
3 |
size 1064
|
checkpoint-50/trainer_state.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"best_metric": 1.
|
3 |
"best_model_checkpoint": "/kaggle/working/checkpoint-50",
|
4 |
"epoch": 5.555555555555555,
|
5 |
"eval_steps": 10,
|
@@ -10,84 +10,84 @@
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.1111111111111112,
|
13 |
-
"grad_norm": 0.
|
14 |
-
"learning_rate": 0.
|
15 |
-
"loss": 2.
|
16 |
"step": 10
|
17 |
},
|
18 |
{
|
19 |
"epoch": 1.1111111111111112,
|
20 |
-
"eval_loss": 1.
|
21 |
-
"eval_runtime":
|
22 |
-
"eval_samples_per_second": 1.
|
23 |
-
"eval_steps_per_second": 0.
|
24 |
"step": 10
|
25 |
},
|
26 |
{
|
27 |
"epoch": 2.2222222222222223,
|
28 |
-
"grad_norm": 0.
|
29 |
-
"learning_rate": 0.
|
30 |
-
"loss": 1.
|
31 |
"step": 20
|
32 |
},
|
33 |
{
|
34 |
"epoch": 2.2222222222222223,
|
35 |
-
"eval_loss": 1.
|
36 |
-
"eval_runtime": 34.
|
37 |
-
"eval_samples_per_second": 1.
|
38 |
"eval_steps_per_second": 0.144,
|
39 |
"step": 20
|
40 |
},
|
41 |
{
|
42 |
"epoch": 3.3333333333333335,
|
43 |
-
"grad_norm": 0.
|
44 |
-
"learning_rate": 0.
|
45 |
-
"loss": 1.
|
46 |
"step": 30
|
47 |
},
|
48 |
{
|
49 |
"epoch": 3.3333333333333335,
|
50 |
-
"eval_loss": 1.
|
51 |
-
"eval_runtime": 34.
|
52 |
-
"eval_samples_per_second": 1.
|
53 |
-
"eval_steps_per_second": 0.
|
54 |
"step": 30
|
55 |
},
|
56 |
{
|
57 |
"epoch": 4.444444444444445,
|
58 |
-
"grad_norm": 0.
|
59 |
-
"learning_rate": 0.
|
60 |
-
"loss": 1.
|
61 |
"step": 40
|
62 |
},
|
63 |
{
|
64 |
"epoch": 4.444444444444445,
|
65 |
-
"eval_loss": 1.
|
66 |
-
"eval_runtime":
|
67 |
-
"eval_samples_per_second": 1.
|
68 |
-
"eval_steps_per_second": 0.
|
69 |
"step": 40
|
70 |
},
|
71 |
{
|
72 |
"epoch": 5.555555555555555,
|
73 |
-
"grad_norm": 0.
|
74 |
-
"learning_rate":
|
75 |
-
"loss": 1.
|
76 |
"step": 50
|
77 |
},
|
78 |
{
|
79 |
"epoch": 5.555555555555555,
|
80 |
-
"eval_loss": 1.
|
81 |
-
"eval_runtime": 34.
|
82 |
-
"eval_samples_per_second": 1.
|
83 |
-
"eval_steps_per_second": 0.
|
84 |
"step": 50
|
85 |
}
|
86 |
],
|
87 |
"logging_steps": 10,
|
88 |
-
"max_steps":
|
89 |
"num_input_tokens_seen": 0,
|
90 |
-
"num_train_epochs":
|
91 |
"save_steps": 10,
|
92 |
"stateful_callbacks": {
|
93 |
"EarlyStoppingCallback": {
|
|
|
1 |
{
|
2 |
+
"best_metric": 1.3168741464614868,
|
3 |
"best_model_checkpoint": "/kaggle/working/checkpoint-50",
|
4 |
"epoch": 5.555555555555555,
|
5 |
"eval_steps": 10,
|
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.1111111111111112,
|
13 |
+
"grad_norm": 0.02217627689242363,
|
14 |
+
"learning_rate": 0.00017777777777777779,
|
15 |
+
"loss": 2.0442,
|
16 |
"step": 10
|
17 |
},
|
18 |
{
|
19 |
"epoch": 1.1111111111111112,
|
20 |
+
"eval_loss": 1.737181544303894,
|
21 |
+
"eval_runtime": 35.1318,
|
22 |
+
"eval_samples_per_second": 1.025,
|
23 |
+
"eval_steps_per_second": 0.142,
|
24 |
"step": 10
|
25 |
},
|
26 |
{
|
27 |
"epoch": 2.2222222222222223,
|
28 |
+
"grad_norm": 0.0346713550388813,
|
29 |
+
"learning_rate": 0.00015555555555555556,
|
30 |
+
"loss": 1.6131,
|
31 |
"step": 20
|
32 |
},
|
33 |
{
|
34 |
"epoch": 2.2222222222222223,
|
35 |
+
"eval_loss": 1.5489343404769897,
|
36 |
+
"eval_runtime": 34.8402,
|
37 |
+
"eval_samples_per_second": 1.033,
|
38 |
"eval_steps_per_second": 0.144,
|
39 |
"step": 20
|
40 |
},
|
41 |
{
|
42 |
"epoch": 3.3333333333333335,
|
43 |
+
"grad_norm": 0.02501535415649414,
|
44 |
+
"learning_rate": 0.00013333333333333334,
|
45 |
+
"loss": 1.4152,
|
46 |
"step": 30
|
47 |
},
|
48 |
{
|
49 |
"epoch": 3.3333333333333335,
|
50 |
+
"eval_loss": 1.4295110702514648,
|
51 |
+
"eval_runtime": 34.8537,
|
52 |
+
"eval_samples_per_second": 1.033,
|
53 |
+
"eval_steps_per_second": 0.143,
|
54 |
"step": 30
|
55 |
},
|
56 |
{
|
57 |
"epoch": 4.444444444444445,
|
58 |
+
"grad_norm": 0.02104916237294674,
|
59 |
+
"learning_rate": 0.00011111111111111112,
|
60 |
+
"loss": 1.3068,
|
61 |
"step": 40
|
62 |
},
|
63 |
{
|
64 |
"epoch": 4.444444444444445,
|
65 |
+
"eval_loss": 1.3598744869232178,
|
66 |
+
"eval_runtime": 35.0281,
|
67 |
+
"eval_samples_per_second": 1.028,
|
68 |
+
"eval_steps_per_second": 0.143,
|
69 |
"step": 40
|
70 |
},
|
71 |
{
|
72 |
"epoch": 5.555555555555555,
|
73 |
+
"grad_norm": 0.022395364940166473,
|
74 |
+
"learning_rate": 8.888888888888889e-05,
|
75 |
+
"loss": 1.2049,
|
76 |
"step": 50
|
77 |
},
|
78 |
{
|
79 |
"epoch": 5.555555555555555,
|
80 |
+
"eval_loss": 1.3168741464614868,
|
81 |
+
"eval_runtime": 34.692,
|
82 |
+
"eval_samples_per_second": 1.038,
|
83 |
+
"eval_steps_per_second": 0.144,
|
84 |
"step": 50
|
85 |
}
|
86 |
],
|
87 |
"logging_steps": 10,
|
88 |
+
"max_steps": 90,
|
89 |
"num_input_tokens_seen": 0,
|
90 |
+
"num_train_epochs": 10,
|
91 |
"save_steps": 10,
|
92 |
"stateful_callbacks": {
|
93 |
"EarlyStoppingCallback": {
|
checkpoint-50/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5112
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2f0053334cab1bfd2838c507b240d05093bc205b1d5fbe6dc54a022fef5dcaa7
|
3 |
size 5112
|
checkpoint-60/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
---
|
2 |
-
library_name: peft
|
3 |
base_model: TheBloke/Llama-2-7B-fp16
|
|
|
4 |
---
|
5 |
|
6 |
# Model Card for Model ID
|
|
|
1 |
---
|
|
|
2 |
base_model: TheBloke/Llama-2-7B-fp16
|
3 |
+
library_name: peft
|
4 |
---
|
5 |
|
6 |
# Model Card for Model ID
|
checkpoint-60/adapter_config.json
CHANGED
@@ -21,9 +21,9 @@
|
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
23 |
"k_proj",
|
24 |
-
"
|
25 |
"o_proj",
|
26 |
-
"
|
27 |
],
|
28 |
"task_type": "CAUSAL_LM",
|
29 |
"use_dora": false,
|
|
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
23 |
"k_proj",
|
24 |
+
"q_proj",
|
25 |
"o_proj",
|
26 |
+
"v_proj"
|
27 |
],
|
28 |
"task_type": "CAUSAL_LM",
|
29 |
"use_dora": false,
|
checkpoint-60/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 67143296
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:14ebbefdf71daa025996a412ce6c4f2fd2d5bbf084a4ee0f0ca1dc123cbb85e5
|
3 |
size 67143296
|
checkpoint-60/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 134433530
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5863ac3d6f865bddc72753e9a6db83e90985a3348345c91097785f539b2d743e
|
3 |
size 134433530
|
checkpoint-60/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a6dfdd6ca5178c66b713159a2bfe5731fea568ef91adf9d3f8039a74c6ff0f6b
|
3 |
size 1064
|
checkpoint-60/trainer_state.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"best_metric": 1.
|
3 |
"best_model_checkpoint": "/kaggle/working/checkpoint-60",
|
4 |
"epoch": 6.666666666666667,
|
5 |
"eval_steps": 10,
|
@@ -10,99 +10,99 @@
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.1111111111111112,
|
13 |
-
"grad_norm": 0.
|
14 |
-
"learning_rate": 0.
|
15 |
-
"loss": 2.
|
16 |
"step": 10
|
17 |
},
|
18 |
{
|
19 |
"epoch": 1.1111111111111112,
|
20 |
-
"eval_loss": 1.
|
21 |
-
"eval_runtime":
|
22 |
-
"eval_samples_per_second": 1.
|
23 |
-
"eval_steps_per_second": 0.
|
24 |
"step": 10
|
25 |
},
|
26 |
{
|
27 |
"epoch": 2.2222222222222223,
|
28 |
-
"grad_norm": 0.
|
29 |
-
"learning_rate": 0.
|
30 |
-
"loss": 1.
|
31 |
"step": 20
|
32 |
},
|
33 |
{
|
34 |
"epoch": 2.2222222222222223,
|
35 |
-
"eval_loss": 1.
|
36 |
-
"eval_runtime": 34.
|
37 |
-
"eval_samples_per_second": 1.
|
38 |
"eval_steps_per_second": 0.144,
|
39 |
"step": 20
|
40 |
},
|
41 |
{
|
42 |
"epoch": 3.3333333333333335,
|
43 |
-
"grad_norm": 0.
|
44 |
-
"learning_rate": 0.
|
45 |
-
"loss": 1.
|
46 |
"step": 30
|
47 |
},
|
48 |
{
|
49 |
"epoch": 3.3333333333333335,
|
50 |
-
"eval_loss": 1.
|
51 |
-
"eval_runtime": 34.
|
52 |
-
"eval_samples_per_second": 1.
|
53 |
-
"eval_steps_per_second": 0.
|
54 |
"step": 30
|
55 |
},
|
56 |
{
|
57 |
"epoch": 4.444444444444445,
|
58 |
-
"grad_norm": 0.
|
59 |
-
"learning_rate": 0.
|
60 |
-
"loss": 1.
|
61 |
"step": 40
|
62 |
},
|
63 |
{
|
64 |
"epoch": 4.444444444444445,
|
65 |
-
"eval_loss": 1.
|
66 |
-
"eval_runtime":
|
67 |
-
"eval_samples_per_second": 1.
|
68 |
-
"eval_steps_per_second": 0.
|
69 |
"step": 40
|
70 |
},
|
71 |
{
|
72 |
"epoch": 5.555555555555555,
|
73 |
-
"grad_norm": 0.
|
74 |
-
"learning_rate":
|
75 |
-
"loss": 1.
|
76 |
"step": 50
|
77 |
},
|
78 |
{
|
79 |
"epoch": 5.555555555555555,
|
80 |
-
"eval_loss": 1.
|
81 |
-
"eval_runtime": 34.
|
82 |
-
"eval_samples_per_second": 1.
|
83 |
-
"eval_steps_per_second": 0.
|
84 |
"step": 50
|
85 |
},
|
86 |
{
|
87 |
"epoch": 6.666666666666667,
|
88 |
-
"grad_norm": 0.
|
89 |
-
"learning_rate":
|
90 |
-
"loss": 1.
|
91 |
"step": 60
|
92 |
},
|
93 |
{
|
94 |
"epoch": 6.666666666666667,
|
95 |
-
"eval_loss": 1.
|
96 |
-
"eval_runtime": 34.
|
97 |
-
"eval_samples_per_second": 1.
|
98 |
-
"eval_steps_per_second": 0.
|
99 |
"step": 60
|
100 |
}
|
101 |
],
|
102 |
"logging_steps": 10,
|
103 |
-
"max_steps":
|
104 |
"num_input_tokens_seen": 0,
|
105 |
-
"num_train_epochs":
|
106 |
"save_steps": 10,
|
107 |
"stateful_callbacks": {
|
108 |
"EarlyStoppingCallback": {
|
|
|
1 |
{
|
2 |
+
"best_metric": 1.2939578294754028,
|
3 |
"best_model_checkpoint": "/kaggle/working/checkpoint-60",
|
4 |
"epoch": 6.666666666666667,
|
5 |
"eval_steps": 10,
|
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.1111111111111112,
|
13 |
+
"grad_norm": 0.02217627689242363,
|
14 |
+
"learning_rate": 0.00017777777777777779,
|
15 |
+
"loss": 2.0442,
|
16 |
"step": 10
|
17 |
},
|
18 |
{
|
19 |
"epoch": 1.1111111111111112,
|
20 |
+
"eval_loss": 1.737181544303894,
|
21 |
+
"eval_runtime": 35.1318,
|
22 |
+
"eval_samples_per_second": 1.025,
|
23 |
+
"eval_steps_per_second": 0.142,
|
24 |
"step": 10
|
25 |
},
|
26 |
{
|
27 |
"epoch": 2.2222222222222223,
|
28 |
+
"grad_norm": 0.0346713550388813,
|
29 |
+
"learning_rate": 0.00015555555555555556,
|
30 |
+
"loss": 1.6131,
|
31 |
"step": 20
|
32 |
},
|
33 |
{
|
34 |
"epoch": 2.2222222222222223,
|
35 |
+
"eval_loss": 1.5489343404769897,
|
36 |
+
"eval_runtime": 34.8402,
|
37 |
+
"eval_samples_per_second": 1.033,
|
38 |
"eval_steps_per_second": 0.144,
|
39 |
"step": 20
|
40 |
},
|
41 |
{
|
42 |
"epoch": 3.3333333333333335,
|
43 |
+
"grad_norm": 0.02501535415649414,
|
44 |
+
"learning_rate": 0.00013333333333333334,
|
45 |
+
"loss": 1.4152,
|
46 |
"step": 30
|
47 |
},
|
48 |
{
|
49 |
"epoch": 3.3333333333333335,
|
50 |
+
"eval_loss": 1.4295110702514648,
|
51 |
+
"eval_runtime": 34.8537,
|
52 |
+
"eval_samples_per_second": 1.033,
|
53 |
+
"eval_steps_per_second": 0.143,
|
54 |
"step": 30
|
55 |
},
|
56 |
{
|
57 |
"epoch": 4.444444444444445,
|
58 |
+
"grad_norm": 0.02104916237294674,
|
59 |
+
"learning_rate": 0.00011111111111111112,
|
60 |
+
"loss": 1.3068,
|
61 |
"step": 40
|
62 |
},
|
63 |
{
|
64 |
"epoch": 4.444444444444445,
|
65 |
+
"eval_loss": 1.3598744869232178,
|
66 |
+
"eval_runtime": 35.0281,
|
67 |
+
"eval_samples_per_second": 1.028,
|
68 |
+
"eval_steps_per_second": 0.143,
|
69 |
"step": 40
|
70 |
},
|
71 |
{
|
72 |
"epoch": 5.555555555555555,
|
73 |
+
"grad_norm": 0.022395364940166473,
|
74 |
+
"learning_rate": 8.888888888888889e-05,
|
75 |
+
"loss": 1.2049,
|
76 |
"step": 50
|
77 |
},
|
78 |
{
|
79 |
"epoch": 5.555555555555555,
|
80 |
+
"eval_loss": 1.3168741464614868,
|
81 |
+
"eval_runtime": 34.692,
|
82 |
+
"eval_samples_per_second": 1.038,
|
83 |
+
"eval_steps_per_second": 0.144,
|
84 |
"step": 50
|
85 |
},
|
86 |
{
|
87 |
"epoch": 6.666666666666667,
|
88 |
+
"grad_norm": 0.02603345364332199,
|
89 |
+
"learning_rate": 6.666666666666667e-05,
|
90 |
+
"loss": 1.1086,
|
91 |
"step": 60
|
92 |
},
|
93 |
{
|
94 |
"epoch": 6.666666666666667,
|
95 |
+
"eval_loss": 1.2939578294754028,
|
96 |
+
"eval_runtime": 34.6444,
|
97 |
+
"eval_samples_per_second": 1.039,
|
98 |
+
"eval_steps_per_second": 0.144,
|
99 |
"step": 60
|
100 |
}
|
101 |
],
|
102 |
"logging_steps": 10,
|
103 |
+
"max_steps": 90,
|
104 |
"num_input_tokens_seen": 0,
|
105 |
+
"num_train_epochs": 10,
|
106 |
"save_steps": 10,
|
107 |
"stateful_callbacks": {
|
108 |
"EarlyStoppingCallback": {
|
checkpoint-60/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5112
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2f0053334cab1bfd2838c507b240d05093bc205b1d5fbe6dc54a022fef5dcaa7
|
3 |
size 5112
|
checkpoint-70/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
---
|
2 |
-
library_name: peft
|
3 |
base_model: TheBloke/Llama-2-7B-fp16
|
|
|
4 |
---
|
5 |
|
6 |
# Model Card for Model ID
|
|
|
1 |
---
|
|
|
2 |
base_model: TheBloke/Llama-2-7B-fp16
|
3 |
+
library_name: peft
|
4 |
---
|
5 |
|
6 |
# Model Card for Model ID
|
checkpoint-70/adapter_config.json
CHANGED
@@ -21,9 +21,9 @@
|
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
23 |
"k_proj",
|
24 |
-
"
|
25 |
"o_proj",
|
26 |
-
"
|
27 |
],
|
28 |
"task_type": "CAUSAL_LM",
|
29 |
"use_dora": false,
|
|
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
23 |
"k_proj",
|
24 |
+
"q_proj",
|
25 |
"o_proj",
|
26 |
+
"v_proj"
|
27 |
],
|
28 |
"task_type": "CAUSAL_LM",
|
29 |
"use_dora": false,
|
checkpoint-70/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 67143296
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0c44abb9173442b0bc413ba469da95e7be794812df0c2a2c16e54fc301511e3e
|
3 |
size 67143296
|
checkpoint-70/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 134433530
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2df92dd99064eb6aa6542b203055cf8cc892cff2f30a210807b7667c96cedc3a
|
3 |
size 134433530
|
checkpoint-70/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2b55d3cbe104822729f4f45e02a6c248fb8a4cb356c229f5c93e65066ff6a397
|
3 |
size 1064
|