End of training

Browse files

Files changed (5) hide show

README.md +19 -19
all_results.json +53 -0
eval_results.json +37 -0
train_results.json +19 -0
trainer_state.json +0 -0

README.md CHANGED Viewed

@@ -18,31 +18,31 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [HuggingFaceTB/SmolLM2-135M](https://huggingface.co/HuggingFaceTB/SmolLM2-135M) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.1038
-- < Precision: 0.9661
-- < Recall: 0.9714
-- < F1-score: 0.9687
 - < Support: 4865.0
-- > Precision: 0.9688
-- > Recall: 0.9700
-- > F1-score: 0.9694
 - > Support: 4865.0
-- = Precision: 0.8884
-- = Recall: 0.8024
-- = F1-score: 0.8432
 - = Support: 248.0
-- - Precision: 0.4615
 - - Recall: 0.2727
-- - F1-score: 0.3429
 - - Support: 22.0
-- Accuracy: 0.965
-- Macro Avg Precision: 0.8212
-- Macro Avg Recall: 0.7541
-- Macro Avg F1-score: 0.7811
 - Macro Avg Support: 10000.0
-- Weighted Avg Precision: 0.9644
-- Weighted Avg Recall: 0.965
-- Weighted Avg F1-score: 0.9646
 - Weighted Avg Support: 10000.0
 ## Model description

 This model is a fine-tuned version of [HuggingFaceTB/SmolLM2-135M](https://huggingface.co/HuggingFaceTB/SmolLM2-135M) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.1047
+- < Precision: 0.9674
+- < Recall: 0.9708
+- < F1-score: 0.9691
 - < Support: 4865.0
+- > Precision: 0.9686
+- > Recall: 0.9706
+- > F1-score: 0.9696
 - > Support: 4865.0
+- = Precision: 0.8734
+- = Recall: 0.8065
+- = F1-score: 0.8386
 - = Support: 248.0
+- - Precision: 0.4286
 - - Recall: 0.2727
+- - F1-score: 0.3333
 - - Support: 22.0
+- Accuracy: 0.9651
+- Macro Avg Precision: 0.8095
+- Macro Avg Recall: 0.7551
+- Macro Avg F1-score: 0.7777
 - Macro Avg Support: 10000.0
+- Weighted Avg Precision: 0.9645
+- Weighted Avg Recall: 0.9651
+- Weighted Avg F1-score: 0.9647
 - Weighted Avg Support: 10000.0
 ## Model description

all_results.json ADDED Viewed

	@@ -0,0 +1,53 @@

+{
+    "before_init_mem_cpu": 3040526336,
+    "before_init_mem_gpu": 512,
+    "epoch": 18.0,
+    "eval_-_f1-score": 0.3333333333333333,
+    "eval_-_precision": 0.42857142857142855,
+    "eval_-_recall": 0.2727272727272727,
+    "eval_-_support": 22.0,
+    "eval_<_f1-score": 0.9691187031907254,
+    "eval_<_precision": 0.9674313805817288,
+    "eval_<_recall": 0.9708119218910586,
+    "eval_<_support": 4865.0,
+    "eval_=_f1-score": 0.8385744234800838,
+    "eval_=_precision": 0.8733624454148472,
+    "eval_=_recall": 0.8064516129032258,
+    "eval_=_support": 248.0,
+    "eval_>_f1-score": 0.9696098562628337,
+    "eval_>_precision": 0.9686153846153847,
+    "eval_>_recall": 0.9706063720452209,
+    "eval_>_support": 4865.0,
+    "eval_accuracy": 0.9651,
+    "eval_loss": 0.10465546697378159,
+    "eval_macro_avg_f1-score": 0.777659079066744,
+    "eval_macro_avg_precision": 0.8094951597958472,
+    "eval_macro_avg_recall": 0.7551492948916945,
+    "eval_macro_avg_support": 10000.0,
+    "eval_mem_cpu_alloc_delta": -175185920,
+    "eval_mem_cpu_peaked_delta": 175185920,
+    "eval_mem_gpu_alloc_delta": 0,
+    "eval_mem_gpu_peaked_delta": 1242359296,
+    "eval_runtime": 12.3635,
+    "eval_samples": 10000,
+    "eval_samples_per_second": 808.83,
+    "eval_steps_per_second": 3.235,
+    "eval_weighted_avg_f1-score": 0.964721423209796,
+    "eval_weighted_avg_precision": 0.964488997057541,
+    "eval_weighted_avg_recall": 0.9651,
+    "eval_weighted_avg_support": 10000.0,
+    "init_mem_cpu_alloc_delta": 18427904,
+    "init_mem_cpu_peaked_delta": 0,
+    "init_mem_gpu_alloc_delta": 0,
+    "init_mem_gpu_peaked_delta": 0,
+    "total_flos": 2.949719748897669e+18,
+    "train_loss": 0.22952656600628965,
+    "train_mem_cpu_alloc_delta": 826355712,
+    "train_mem_cpu_peaked_delta": 403324928,
+    "train_mem_gpu_alloc_delta": 690193408,
+    "train_mem_gpu_peaked_delta": 19408810496,
+    "train_runtime": 33563.6304,
+    "train_samples": 502272,
+    "train_samples_per_second": 448.943,
+    "train_steps_per_second": 0.877
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+    "epoch": 18.0,
+    "eval_-_f1-score": 0.3333333333333333,
+    "eval_-_precision": 0.42857142857142855,
+    "eval_-_recall": 0.2727272727272727,
+    "eval_-_support": 22.0,
+    "eval_<_f1-score": 0.9691187031907254,
+    "eval_<_precision": 0.9674313805817288,
+    "eval_<_recall": 0.9708119218910586,
+    "eval_<_support": 4865.0,
+    "eval_=_f1-score": 0.8385744234800838,
+    "eval_=_precision": 0.8733624454148472,
+    "eval_=_recall": 0.8064516129032258,
+    "eval_=_support": 248.0,
+    "eval_>_f1-score": 0.9696098562628337,
+    "eval_>_precision": 0.9686153846153847,
+    "eval_>_recall": 0.9706063720452209,
+    "eval_>_support": 4865.0,
+    "eval_accuracy": 0.9651,
+    "eval_loss": 0.10465546697378159,
+    "eval_macro_avg_f1-score": 0.777659079066744,
+    "eval_macro_avg_precision": 0.8094951597958472,
+    "eval_macro_avg_recall": 0.7551492948916945,
+    "eval_macro_avg_support": 10000.0,
+    "eval_mem_cpu_alloc_delta": -175185920,
+    "eval_mem_cpu_peaked_delta": 175185920,
+    "eval_mem_gpu_alloc_delta": 0,
+    "eval_mem_gpu_peaked_delta": 1242359296,
+    "eval_runtime": 12.3635,
+    "eval_samples": 10000,
+    "eval_samples_per_second": 808.83,
+    "eval_steps_per_second": 3.235,
+    "eval_weighted_avg_f1-score": 0.964721423209796,
+    "eval_weighted_avg_precision": 0.964488997057541,
+    "eval_weighted_avg_recall": 0.9651,
+    "eval_weighted_avg_support": 10000.0
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,19 @@

+{
+    "before_init_mem_cpu": 3040526336,
+    "before_init_mem_gpu": 512,
+    "epoch": 18.0,
+    "init_mem_cpu_alloc_delta": 18427904,
+    "init_mem_cpu_peaked_delta": 0,
+    "init_mem_gpu_alloc_delta": 0,
+    "init_mem_gpu_peaked_delta": 0,
+    "total_flos": 2.949719748897669e+18,
+    "train_loss": 0.22952656600628965,
+    "train_mem_cpu_alloc_delta": 826355712,
+    "train_mem_cpu_peaked_delta": 403324928,
+    "train_mem_gpu_alloc_delta": 690193408,
+    "train_mem_gpu_peaked_delta": 19408810496,
+    "train_runtime": 33563.6304,
+    "train_samples": 502272,
+    "train_samples_per_second": 448.943,
+    "train_steps_per_second": 0.877
+}

trainer_state.json ADDED Viewed

The diff for this file is too large to render. See raw diff