End of training
Browse files- README.md +20 -20
- all_results.json +53 -0
- eval_results.json +37 -0
- train_results.json +19 -0
- trainer_state.json +0 -0
README.md
CHANGED
@@ -18,31 +18,31 @@ should probably proofread and complete it, then remove this comment. -->
|
|
18 |
|
19 |
This model is a fine-tuned version of [HuggingFaceTB/SmolLM2-135M](https://huggingface.co/HuggingFaceTB/SmolLM2-135M) on the None dataset.
|
20 |
It achieves the following results on the evaluation set:
|
21 |
-
- Loss: 0.
|
22 |
-
- < Precision: 0.
|
23 |
-
- < Recall: 0.
|
24 |
-
- < F1-score: 0.
|
25 |
- < Support: 7717.0
|
26 |
-
- > Precision: 0.
|
27 |
-
- > Recall: 0.
|
28 |
-
- > F1-score: 0.
|
29 |
- > Support: 7717.0
|
30 |
-
- = Precision: 0.
|
31 |
-
- = Recall: 0.
|
32 |
-
- = F1-score: 0.
|
33 |
- = Support: 3244.0
|
34 |
-
- - Precision: 0.
|
35 |
-
- - Recall: 0.
|
36 |
-
- - F1-score: 0.
|
37 |
- - Support: 1322.0
|
38 |
-
- Accuracy: 0.
|
39 |
-
- Macro Avg Precision: 0.
|
40 |
-
- Macro Avg Recall: 0.
|
41 |
-
- Macro Avg F1-score: 0.
|
42 |
- Macro Avg Support: 20000.0
|
43 |
-
- Weighted Avg Precision: 0.
|
44 |
-
- Weighted Avg Recall: 0.
|
45 |
-
- Weighted Avg F1-score: 0.
|
46 |
- Weighted Avg Support: 20000.0
|
47 |
|
48 |
## Model description
|
|
|
18 |
|
19 |
This model is a fine-tuned version of [HuggingFaceTB/SmolLM2-135M](https://huggingface.co/HuggingFaceTB/SmolLM2-135M) on the None dataset.
|
20 |
It achieves the following results on the evaluation set:
|
21 |
+
- Loss: 0.1898
|
22 |
+
- < Precision: 0.9121
|
23 |
+
- < Recall: 0.9051
|
24 |
+
- < F1-score: 0.9086
|
25 |
- < Support: 7717.0
|
26 |
+
- > Precision: 0.9113
|
27 |
+
- > Recall: 0.9016
|
28 |
+
- > F1-score: 0.9065
|
29 |
- > Support: 7717.0
|
30 |
+
- = Precision: 0.7992
|
31 |
+
- = Recall: 0.8098
|
32 |
+
- = F1-score: 0.8045
|
33 |
- = Support: 3244.0
|
34 |
+
- - Precision: 0.7401
|
35 |
+
- - Recall: 0.7950
|
36 |
+
- - F1-score: 0.7666
|
37 |
- - Support: 1322.0
|
38 |
+
- Accuracy: 0.8810
|
39 |
+
- Macro Avg Precision: 0.8407
|
40 |
+
- Macro Avg Recall: 0.8529
|
41 |
+
- Macro Avg F1-score: 0.8465
|
42 |
- Macro Avg Support: 20000.0
|
43 |
+
- Weighted Avg Precision: 0.8821
|
44 |
+
- Weighted Avg Recall: 0.8810
|
45 |
+
- Weighted Avg F1-score: 0.8815
|
46 |
- Weighted Avg Support: 20000.0
|
47 |
|
48 |
## Model description
|
all_results.json
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"before_init_mem_cpu": 3866619904,
|
3 |
+
"before_init_mem_gpu": 512,
|
4 |
+
"epoch": 5.0,
|
5 |
+
"eval_-_f1-score": 0.7665937272064187,
|
6 |
+
"eval_-_precision": 0.7401408450704225,
|
7 |
+
"eval_-_recall": 0.7950075642965204,
|
8 |
+
"eval_-_support": 1322.0,
|
9 |
+
"eval_<_f1-score": 0.9086178861788617,
|
10 |
+
"eval_<_precision": 0.9121180464873335,
|
11 |
+
"eval_<_recall": 0.9051444861993002,
|
12 |
+
"eval_<_support": 7717.0,
|
13 |
+
"eval_=_f1-score": 0.8044709845352932,
|
14 |
+
"eval_=_precision": 0.7992090051718893,
|
15 |
+
"eval_=_recall": 0.80980271270037,
|
16 |
+
"eval_=_support": 3244.0,
|
17 |
+
"eval_>_f1-score": 0.9064616988014591,
|
18 |
+
"eval_>_precision": 0.9113294040602489,
|
19 |
+
"eval_>_recall": 0.9016457172476351,
|
20 |
+
"eval_>_support": 7717.0,
|
21 |
+
"eval_accuracy": 0.88105,
|
22 |
+
"eval_loss": 0.18984687328338623,
|
23 |
+
"eval_macro_avg_f1-score": 0.8465360741805081,
|
24 |
+
"eval_macro_avg_precision": 0.8406993251974735,
|
25 |
+
"eval_macro_avg_recall": 0.8529001201109565,
|
26 |
+
"eval_macro_avg_support": 20000.0,
|
27 |
+
"eval_mem_cpu_alloc_delta": -269582336,
|
28 |
+
"eval_mem_cpu_peaked_delta": 269582336,
|
29 |
+
"eval_mem_gpu_alloc_delta": 0,
|
30 |
+
"eval_mem_gpu_peaked_delta": 1242539008,
|
31 |
+
"eval_runtime": 14.1119,
|
32 |
+
"eval_samples": 20000,
|
33 |
+
"eval_samples_per_second": 1417.242,
|
34 |
+
"eval_steps_per_second": 5.598,
|
35 |
+
"eval_weighted_avg_f1-score": 0.8815054969246255,
|
36 |
+
"eval_weighted_avg_precision": 0.8821322092918199,
|
37 |
+
"eval_weighted_avg_recall": 0.88105,
|
38 |
+
"eval_weighted_avg_support": 20000.0,
|
39 |
+
"init_mem_cpu_alloc_delta": 18591744,
|
40 |
+
"init_mem_cpu_peaked_delta": 0,
|
41 |
+
"init_mem_gpu_alloc_delta": 0,
|
42 |
+
"init_mem_gpu_peaked_delta": 0,
|
43 |
+
"total_flos": 2.2572496552911176e+18,
|
44 |
+
"train_loss": 0.3141621667137878,
|
45 |
+
"train_mem_cpu_alloc_delta": 827277312,
|
46 |
+
"train_mem_cpu_peaked_delta": 311078912,
|
47 |
+
"train_mem_gpu_alloc_delta": 757461504,
|
48 |
+
"train_mem_gpu_peaked_delta": 19341542400,
|
49 |
+
"train_runtime": 24803.7,
|
50 |
+
"train_samples": 1386134,
|
51 |
+
"train_samples_per_second": 1676.525,
|
52 |
+
"train_steps_per_second": 3.274
|
53 |
+
}
|
eval_results.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 5.0,
|
3 |
+
"eval_-_f1-score": 0.7665937272064187,
|
4 |
+
"eval_-_precision": 0.7401408450704225,
|
5 |
+
"eval_-_recall": 0.7950075642965204,
|
6 |
+
"eval_-_support": 1322.0,
|
7 |
+
"eval_<_f1-score": 0.9086178861788617,
|
8 |
+
"eval_<_precision": 0.9121180464873335,
|
9 |
+
"eval_<_recall": 0.9051444861993002,
|
10 |
+
"eval_<_support": 7717.0,
|
11 |
+
"eval_=_f1-score": 0.8044709845352932,
|
12 |
+
"eval_=_precision": 0.7992090051718893,
|
13 |
+
"eval_=_recall": 0.80980271270037,
|
14 |
+
"eval_=_support": 3244.0,
|
15 |
+
"eval_>_f1-score": 0.9064616988014591,
|
16 |
+
"eval_>_precision": 0.9113294040602489,
|
17 |
+
"eval_>_recall": 0.9016457172476351,
|
18 |
+
"eval_>_support": 7717.0,
|
19 |
+
"eval_accuracy": 0.88105,
|
20 |
+
"eval_loss": 0.18984687328338623,
|
21 |
+
"eval_macro_avg_f1-score": 0.8465360741805081,
|
22 |
+
"eval_macro_avg_precision": 0.8406993251974735,
|
23 |
+
"eval_macro_avg_recall": 0.8529001201109565,
|
24 |
+
"eval_macro_avg_support": 20000.0,
|
25 |
+
"eval_mem_cpu_alloc_delta": -269582336,
|
26 |
+
"eval_mem_cpu_peaked_delta": 269582336,
|
27 |
+
"eval_mem_gpu_alloc_delta": 0,
|
28 |
+
"eval_mem_gpu_peaked_delta": 1242539008,
|
29 |
+
"eval_runtime": 14.1119,
|
30 |
+
"eval_samples": 20000,
|
31 |
+
"eval_samples_per_second": 1417.242,
|
32 |
+
"eval_steps_per_second": 5.598,
|
33 |
+
"eval_weighted_avg_f1-score": 0.8815054969246255,
|
34 |
+
"eval_weighted_avg_precision": 0.8821322092918199,
|
35 |
+
"eval_weighted_avg_recall": 0.88105,
|
36 |
+
"eval_weighted_avg_support": 20000.0
|
37 |
+
}
|
train_results.json
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"before_init_mem_cpu": 3866619904,
|
3 |
+
"before_init_mem_gpu": 512,
|
4 |
+
"epoch": 5.0,
|
5 |
+
"init_mem_cpu_alloc_delta": 18591744,
|
6 |
+
"init_mem_cpu_peaked_delta": 0,
|
7 |
+
"init_mem_gpu_alloc_delta": 0,
|
8 |
+
"init_mem_gpu_peaked_delta": 0,
|
9 |
+
"total_flos": 2.2572496552911176e+18,
|
10 |
+
"train_loss": 0.3141621667137878,
|
11 |
+
"train_mem_cpu_alloc_delta": 827277312,
|
12 |
+
"train_mem_cpu_peaked_delta": 311078912,
|
13 |
+
"train_mem_gpu_alloc_delta": 757461504,
|
14 |
+
"train_mem_gpu_peaked_delta": 19341542400,
|
15 |
+
"train_runtime": 24803.7,
|
16 |
+
"train_samples": 1386134,
|
17 |
+
"train_samples_per_second": 1676.525,
|
18 |
+
"train_steps_per_second": 3.274
|
19 |
+
}
|
trainer_state.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|