hugosousa commited on
Commit
397c1eb
·
verified ·
1 Parent(s): b5571c0

End of training

Browse files
Files changed (5) hide show
  1. README.md +20 -20
  2. all_results.json +53 -0
  3. eval_results.json +37 -0
  4. train_results.json +19 -0
  5. trainer_state.json +0 -0
README.md CHANGED
@@ -18,31 +18,31 @@ should probably proofread and complete it, then remove this comment. -->
18
 
19
  This model is a fine-tuned version of [HuggingFaceTB/SmolLM2-135M](https://huggingface.co/HuggingFaceTB/SmolLM2-135M) on the None dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 0.2285
22
- - < Precision: 0.9131
23
- - < Recall: 0.9079
24
- - < F1-score: 0.9105
25
  - < Support: 7717.0
26
- - > Precision: 0.9138
27
- - > Recall: 0.9093
28
- - > F1-score: 0.9115
29
  - > Support: 7717.0
30
- - = Precision: 0.7882
31
- - = Recall: 0.7975
32
- - = F1-score: 0.7928
33
  - = Support: 3244.0
34
- - - Precision: 0.7313
35
- - - Recall: 0.7557
36
- - - F1-score: 0.7433
37
  - - Support: 1322.0
38
- - Accuracy: 0.8804
39
- - Macro Avg Precision: 0.8366
40
- - Macro Avg Recall: 0.8426
41
- - Macro Avg F1-score: 0.8395
42
  - Macro Avg Support: 20000.0
43
- - Weighted Avg Precision: 0.8811
44
- - Weighted Avg Recall: 0.8804
45
- - Weighted Avg F1-score: 0.8807
46
  - Weighted Avg Support: 20000.0
47
 
48
  ## Model description
 
18
 
19
  This model is a fine-tuned version of [HuggingFaceTB/SmolLM2-135M](https://huggingface.co/HuggingFaceTB/SmolLM2-135M) on the None dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.1898
22
+ - < Precision: 0.9121
23
+ - < Recall: 0.9051
24
+ - < F1-score: 0.9086
25
  - < Support: 7717.0
26
+ - > Precision: 0.9113
27
+ - > Recall: 0.9016
28
+ - > F1-score: 0.9065
29
  - > Support: 7717.0
30
+ - = Precision: 0.7992
31
+ - = Recall: 0.8098
32
+ - = F1-score: 0.8045
33
  - = Support: 3244.0
34
+ - - Precision: 0.7401
35
+ - - Recall: 0.7950
36
+ - - F1-score: 0.7666
37
  - - Support: 1322.0
38
+ - Accuracy: 0.8810
39
+ - Macro Avg Precision: 0.8407
40
+ - Macro Avg Recall: 0.8529
41
+ - Macro Avg F1-score: 0.8465
42
  - Macro Avg Support: 20000.0
43
+ - Weighted Avg Precision: 0.8821
44
+ - Weighted Avg Recall: 0.8810
45
+ - Weighted Avg F1-score: 0.8815
46
  - Weighted Avg Support: 20000.0
47
 
48
  ## Model description
all_results.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "before_init_mem_cpu": 3866619904,
3
+ "before_init_mem_gpu": 512,
4
+ "epoch": 5.0,
5
+ "eval_-_f1-score": 0.7665937272064187,
6
+ "eval_-_precision": 0.7401408450704225,
7
+ "eval_-_recall": 0.7950075642965204,
8
+ "eval_-_support": 1322.0,
9
+ "eval_<_f1-score": 0.9086178861788617,
10
+ "eval_<_precision": 0.9121180464873335,
11
+ "eval_<_recall": 0.9051444861993002,
12
+ "eval_<_support": 7717.0,
13
+ "eval_=_f1-score": 0.8044709845352932,
14
+ "eval_=_precision": 0.7992090051718893,
15
+ "eval_=_recall": 0.80980271270037,
16
+ "eval_=_support": 3244.0,
17
+ "eval_>_f1-score": 0.9064616988014591,
18
+ "eval_>_precision": 0.9113294040602489,
19
+ "eval_>_recall": 0.9016457172476351,
20
+ "eval_>_support": 7717.0,
21
+ "eval_accuracy": 0.88105,
22
+ "eval_loss": 0.18984687328338623,
23
+ "eval_macro_avg_f1-score": 0.8465360741805081,
24
+ "eval_macro_avg_precision": 0.8406993251974735,
25
+ "eval_macro_avg_recall": 0.8529001201109565,
26
+ "eval_macro_avg_support": 20000.0,
27
+ "eval_mem_cpu_alloc_delta": -269582336,
28
+ "eval_mem_cpu_peaked_delta": 269582336,
29
+ "eval_mem_gpu_alloc_delta": 0,
30
+ "eval_mem_gpu_peaked_delta": 1242539008,
31
+ "eval_runtime": 14.1119,
32
+ "eval_samples": 20000,
33
+ "eval_samples_per_second": 1417.242,
34
+ "eval_steps_per_second": 5.598,
35
+ "eval_weighted_avg_f1-score": 0.8815054969246255,
36
+ "eval_weighted_avg_precision": 0.8821322092918199,
37
+ "eval_weighted_avg_recall": 0.88105,
38
+ "eval_weighted_avg_support": 20000.0,
39
+ "init_mem_cpu_alloc_delta": 18591744,
40
+ "init_mem_cpu_peaked_delta": 0,
41
+ "init_mem_gpu_alloc_delta": 0,
42
+ "init_mem_gpu_peaked_delta": 0,
43
+ "total_flos": 2.2572496552911176e+18,
44
+ "train_loss": 0.3141621667137878,
45
+ "train_mem_cpu_alloc_delta": 827277312,
46
+ "train_mem_cpu_peaked_delta": 311078912,
47
+ "train_mem_gpu_alloc_delta": 757461504,
48
+ "train_mem_gpu_peaked_delta": 19341542400,
49
+ "train_runtime": 24803.7,
50
+ "train_samples": 1386134,
51
+ "train_samples_per_second": 1676.525,
52
+ "train_steps_per_second": 3.274
53
+ }
eval_results.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.0,
3
+ "eval_-_f1-score": 0.7665937272064187,
4
+ "eval_-_precision": 0.7401408450704225,
5
+ "eval_-_recall": 0.7950075642965204,
6
+ "eval_-_support": 1322.0,
7
+ "eval_<_f1-score": 0.9086178861788617,
8
+ "eval_<_precision": 0.9121180464873335,
9
+ "eval_<_recall": 0.9051444861993002,
10
+ "eval_<_support": 7717.0,
11
+ "eval_=_f1-score": 0.8044709845352932,
12
+ "eval_=_precision": 0.7992090051718893,
13
+ "eval_=_recall": 0.80980271270037,
14
+ "eval_=_support": 3244.0,
15
+ "eval_>_f1-score": 0.9064616988014591,
16
+ "eval_>_precision": 0.9113294040602489,
17
+ "eval_>_recall": 0.9016457172476351,
18
+ "eval_>_support": 7717.0,
19
+ "eval_accuracy": 0.88105,
20
+ "eval_loss": 0.18984687328338623,
21
+ "eval_macro_avg_f1-score": 0.8465360741805081,
22
+ "eval_macro_avg_precision": 0.8406993251974735,
23
+ "eval_macro_avg_recall": 0.8529001201109565,
24
+ "eval_macro_avg_support": 20000.0,
25
+ "eval_mem_cpu_alloc_delta": -269582336,
26
+ "eval_mem_cpu_peaked_delta": 269582336,
27
+ "eval_mem_gpu_alloc_delta": 0,
28
+ "eval_mem_gpu_peaked_delta": 1242539008,
29
+ "eval_runtime": 14.1119,
30
+ "eval_samples": 20000,
31
+ "eval_samples_per_second": 1417.242,
32
+ "eval_steps_per_second": 5.598,
33
+ "eval_weighted_avg_f1-score": 0.8815054969246255,
34
+ "eval_weighted_avg_precision": 0.8821322092918199,
35
+ "eval_weighted_avg_recall": 0.88105,
36
+ "eval_weighted_avg_support": 20000.0
37
+ }
train_results.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "before_init_mem_cpu": 3866619904,
3
+ "before_init_mem_gpu": 512,
4
+ "epoch": 5.0,
5
+ "init_mem_cpu_alloc_delta": 18591744,
6
+ "init_mem_cpu_peaked_delta": 0,
7
+ "init_mem_gpu_alloc_delta": 0,
8
+ "init_mem_gpu_peaked_delta": 0,
9
+ "total_flos": 2.2572496552911176e+18,
10
+ "train_loss": 0.3141621667137878,
11
+ "train_mem_cpu_alloc_delta": 827277312,
12
+ "train_mem_cpu_peaked_delta": 311078912,
13
+ "train_mem_gpu_alloc_delta": 757461504,
14
+ "train_mem_gpu_peaked_delta": 19341542400,
15
+ "train_runtime": 24803.7,
16
+ "train_samples": 1386134,
17
+ "train_samples_per_second": 1676.525,
18
+ "train_steps_per_second": 3.274
19
+ }
trainer_state.json ADDED
The diff for this file is too large to render. See raw diff