Model save
Browse files- README.md +41 -41
- adapter_model.safetensors +1 -1
- all_results.json +6 -6
- train_results.json +6 -6
README.md
CHANGED
@@ -18,7 +18,7 @@ should probably proofread and complete it, then remove this comment. -->
|
|
18 |
|
19 |
This model is a fine-tuned version of [meta-llama/Meta-Llama-3-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) on the None dataset.
|
20 |
It achieves the following results on the evaluation set:
|
21 |
-
- Loss: 0.
|
22 |
|
23 |
## Model description
|
24 |
|
@@ -52,46 +52,46 @@ The following hyperparameters were used during training:
|
|
52 |
|
53 |
| Training Loss | Epoch | Step | Validation Loss |
|
54 |
|:-------------:|:------:|:----:|:---------------:|
|
55 |
-
| 0.
|
56 |
-
| 0.
|
57 |
-
| 0.
|
58 |
-
| 0.
|
59 |
-
| 0.
|
60 |
-
| 0.
|
61 |
-
| 0.
|
62 |
-
| 0.
|
63 |
-
| 0.
|
64 |
-
| 0.
|
65 |
-
| 0.
|
66 |
-
| 0.
|
67 |
-
| 0.
|
68 |
-
| 0.
|
69 |
-
| 0.
|
70 |
-
| 0.
|
71 |
-
| 0.
|
72 |
-
| 0.
|
73 |
-
| 0.
|
74 |
-
| 0.
|
75 |
-
| 0.
|
76 |
-
| 0.
|
77 |
-
| 0.
|
78 |
-
| 0.
|
79 |
-
| 0.
|
80 |
-
| 0.
|
81 |
-
| 0.
|
82 |
-
| 0.
|
83 |
-
| 0.
|
84 |
-
| 0.
|
85 |
-
| 0.
|
86 |
-
| 0.
|
87 |
-
| 0.
|
88 |
-
| 0.
|
89 |
-
| 0.
|
90 |
-
| 0.
|
91 |
-
| 0.
|
92 |
-
| 0.
|
93 |
-
| 0.
|
94 |
-
| 0.
|
95 |
|
96 |
|
97 |
### Framework versions
|
|
|
18 |
|
19 |
This model is a fine-tuned version of [meta-llama/Meta-Llama-3-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) on the None dataset.
|
20 |
It achieves the following results on the evaluation set:
|
21 |
+
- Loss: 0.4651
|
22 |
|
23 |
## Model description
|
24 |
|
|
|
52 |
|
53 |
| Training Loss | Epoch | Step | Validation Loss |
|
54 |
|:-------------:|:------:|:----:|:---------------:|
|
55 |
+
| 0.0811 | 0.1892 | 100 | 0.2884 |
|
56 |
+
| 0.0461 | 0.3784 | 200 | 0.1991 |
|
57 |
+
| 0.0421 | 0.5676 | 300 | 0.2228 |
|
58 |
+
| 0.0307 | 0.7569 | 400 | 0.1926 |
|
59 |
+
| 0.0729 | 0.9461 | 500 | 0.1457 |
|
60 |
+
| 0.0344 | 1.1353 | 600 | 0.1295 |
|
61 |
+
| 0.0257 | 1.3245 | 700 | 0.2353 |
|
62 |
+
| 0.0379 | 1.5137 | 800 | 0.2110 |
|
63 |
+
| 0.0247 | 1.7029 | 900 | 0.1904 |
|
64 |
+
| 0.0018 | 1.8921 | 1000 | 0.2594 |
|
65 |
+
| 0.0115 | 2.0814 | 1100 | 0.2567 |
|
66 |
+
| 0.0059 | 2.2706 | 1200 | 0.2599 |
|
67 |
+
| 0.0225 | 2.4598 | 1300 | 0.2947 |
|
68 |
+
| 0.0149 | 2.6490 | 1400 | 0.2559 |
|
69 |
+
| 0.0293 | 2.8382 | 1500 | 0.2606 |
|
70 |
+
| 0.0026 | 3.0274 | 1600 | 0.2469 |
|
71 |
+
| 0.0145 | 3.2167 | 1700 | 0.2146 |
|
72 |
+
| 0.0004 | 3.4059 | 1800 | 0.3081 |
|
73 |
+
| 0.0117 | 3.5951 | 1900 | 0.3059 |
|
74 |
+
| 0.0207 | 3.7843 | 2000 | 0.3001 |
|
75 |
+
| 0.0061 | 3.9735 | 2100 | 0.3827 |
|
76 |
+
| 0.0072 | 4.1627 | 2200 | 0.3541 |
|
77 |
+
| 0.0348 | 4.3519 | 2300 | 0.3904 |
|
78 |
+
| 0.0019 | 4.5412 | 2400 | 0.3549 |
|
79 |
+
| 0.0031 | 4.7304 | 2500 | 0.3791 |
|
80 |
+
| 0.0009 | 4.9196 | 2600 | 0.4193 |
|
81 |
+
| 0.0011 | 5.1088 | 2700 | 0.4539 |
|
82 |
+
| 0.0251 | 5.2980 | 2800 | 0.4403 |
|
83 |
+
| 0.0008 | 5.4872 | 2900 | 0.4527 |
|
84 |
+
| 0.0085 | 5.6764 | 3000 | 0.4156 |
|
85 |
+
| 0.0013 | 5.8657 | 3100 | 0.4183 |
|
86 |
+
| 0.0007 | 6.0549 | 3200 | 0.4241 |
|
87 |
+
| 0.0025 | 6.2441 | 3300 | 0.4420 |
|
88 |
+
| 0.0029 | 6.4333 | 3400 | 0.4514 |
|
89 |
+
| 0.0041 | 6.6225 | 3500 | 0.4619 |
|
90 |
+
| 0.0009 | 6.8117 | 3600 | 0.4452 |
|
91 |
+
| 0.0001 | 7.0009 | 3700 | 0.4656 |
|
92 |
+
| 0.0007 | 7.1902 | 3800 | 0.4603 |
|
93 |
+
| 0.0014 | 7.3794 | 3900 | 0.4651 |
|
94 |
+
| 0.0175 | 7.5686 | 4000 | 0.4651 |
|
95 |
|
96 |
|
97 |
### Framework versions
|
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2115012328
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f9415f3a23a12844000b44f2bf763a6db9aef6a37cce0e9450206eaa07c6c811
|
3 |
size 2115012328
|
all_results.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
-
"epoch":
|
3 |
-
"total_flos": 3.
|
4 |
-
"train_loss": 0.
|
5 |
-
"train_runtime":
|
6 |
-
"train_samples_per_second":
|
7 |
-
"train_steps_per_second": 0.
|
8 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 7.750236518448439,
|
3 |
+
"total_flos": 3.618461896742535e+18,
|
4 |
+
"train_loss": 0.06011972692103562,
|
5 |
+
"train_runtime": 39228.4809,
|
6 |
+
"train_samples_per_second": 6.682,
|
7 |
+
"train_steps_per_second": 0.104
|
8 |
}
|
train_results.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
-
"epoch":
|
3 |
-
"total_flos": 3.
|
4 |
-
"train_loss": 0.
|
5 |
-
"train_runtime":
|
6 |
-
"train_samples_per_second":
|
7 |
-
"train_steps_per_second": 0.
|
8 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 7.750236518448439,
|
3 |
+
"total_flos": 3.618461896742535e+18,
|
4 |
+
"train_loss": 0.06011972692103562,
|
5 |
+
"train_runtime": 39228.4809,
|
6 |
+
"train_samples_per_second": 6.682,
|
7 |
+
"train_steps_per_second": 0.104
|
8 |
}
|