End of training

Browse files

Files changed (5) hide show

README.md +20 -20
all_results.json +53 -0
eval_results.json +37 -0
train_results.json +19 -0
trainer_state.json +0 -0

README.md CHANGED Viewed

@@ -18,31 +18,31 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [HuggingFaceTB/SmolLM2-135M](https://huggingface.co/HuggingFaceTB/SmolLM2-135M) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.2285
-- < Precision: 0.9131
-- < Recall: 0.9079
-- < F1-score: 0.9105
 - < Support: 7717.0
-- > Precision: 0.9138
-- > Recall: 0.9093
-- > F1-score: 0.9115
 - > Support: 7717.0
-- = Precision: 0.7882
-- = Recall: 0.7975
-- = F1-score: 0.7928
 - = Support: 3244.0
-- - Precision: 0.7313
-- - Recall: 0.7557
-- - F1-score: 0.7433
 - - Support: 1322.0
-- Accuracy: 0.8804
-- Macro Avg Precision: 0.8366
-- Macro Avg Recall: 0.8426
-- Macro Avg F1-score: 0.8395
 - Macro Avg Support: 20000.0
-- Weighted Avg Precision: 0.8811
-- Weighted Avg Recall: 0.8804
-- Weighted Avg F1-score: 0.8807
 - Weighted Avg Support: 20000.0
 ## Model description

 This model is a fine-tuned version of [HuggingFaceTB/SmolLM2-135M](https://huggingface.co/HuggingFaceTB/SmolLM2-135M) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.1898
+- < Precision: 0.9121
+- < Recall: 0.9051
+- < F1-score: 0.9086
 - < Support: 7717.0
+- > Precision: 0.9113
+- > Recall: 0.9016
+- > F1-score: 0.9065
 - > Support: 7717.0
+- = Precision: 0.7992
+- = Recall: 0.8098
+- = F1-score: 0.8045
 - = Support: 3244.0
+- - Precision: 0.7401
+- - Recall: 0.7950
+- - F1-score: 0.7666
 - - Support: 1322.0
+- Accuracy: 0.8810
+- Macro Avg Precision: 0.8407
+- Macro Avg Recall: 0.8529
+- Macro Avg F1-score: 0.8465
 - Macro Avg Support: 20000.0
+- Weighted Avg Precision: 0.8821
+- Weighted Avg Recall: 0.8810
+- Weighted Avg F1-score: 0.8815
 - Weighted Avg Support: 20000.0
 ## Model description

all_results.json ADDED Viewed

	@@ -0,0 +1,53 @@

+{
+    "before_init_mem_cpu": 3866619904,
+    "before_init_mem_gpu": 512,
+    "epoch": 5.0,
+    "eval_-_f1-score": 0.7665937272064187,
+    "eval_-_precision": 0.7401408450704225,
+    "eval_-_recall": 0.7950075642965204,
+    "eval_-_support": 1322.0,
+    "eval_<_f1-score": 0.9086178861788617,
+    "eval_<_precision": 0.9121180464873335,
+    "eval_<_recall": 0.9051444861993002,
+    "eval_<_support": 7717.0,
+    "eval_=_f1-score": 0.8044709845352932,
+    "eval_=_precision": 0.7992090051718893,
+    "eval_=_recall": 0.80980271270037,
+    "eval_=_support": 3244.0,
+    "eval_>_f1-score": 0.9064616988014591,
+    "eval_>_precision": 0.9113294040602489,
+    "eval_>_recall": 0.9016457172476351,
+    "eval_>_support": 7717.0,
+    "eval_accuracy": 0.88105,
+    "eval_loss": 0.18984687328338623,
+    "eval_macro_avg_f1-score": 0.8465360741805081,
+    "eval_macro_avg_precision": 0.8406993251974735,
+    "eval_macro_avg_recall": 0.8529001201109565,
+    "eval_macro_avg_support": 20000.0,
+    "eval_mem_cpu_alloc_delta": -269582336,
+    "eval_mem_cpu_peaked_delta": 269582336,
+    "eval_mem_gpu_alloc_delta": 0,
+    "eval_mem_gpu_peaked_delta": 1242539008,
+    "eval_runtime": 14.1119,
+    "eval_samples": 20000,
+    "eval_samples_per_second": 1417.242,
+    "eval_steps_per_second": 5.598,
+    "eval_weighted_avg_f1-score": 0.8815054969246255,
+    "eval_weighted_avg_precision": 0.8821322092918199,
+    "eval_weighted_avg_recall": 0.88105,
+    "eval_weighted_avg_support": 20000.0,
+    "init_mem_cpu_alloc_delta": 18591744,
+    "init_mem_cpu_peaked_delta": 0,
+    "init_mem_gpu_alloc_delta": 0,
+    "init_mem_gpu_peaked_delta": 0,
+    "total_flos": 2.2572496552911176e+18,
+    "train_loss": 0.3141621667137878,
+    "train_mem_cpu_alloc_delta": 827277312,
+    "train_mem_cpu_peaked_delta": 311078912,
+    "train_mem_gpu_alloc_delta": 757461504,
+    "train_mem_gpu_peaked_delta": 19341542400,
+    "train_runtime": 24803.7,
+    "train_samples": 1386134,
+    "train_samples_per_second": 1676.525,
+    "train_steps_per_second": 3.274
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+    "epoch": 5.0,
+    "eval_-_f1-score": 0.7665937272064187,
+    "eval_-_precision": 0.7401408450704225,
+    "eval_-_recall": 0.7950075642965204,
+    "eval_-_support": 1322.0,
+    "eval_<_f1-score": 0.9086178861788617,
+    "eval_<_precision": 0.9121180464873335,
+    "eval_<_recall": 0.9051444861993002,
+    "eval_<_support": 7717.0,
+    "eval_=_f1-score": 0.8044709845352932,
+    "eval_=_precision": 0.7992090051718893,
+    "eval_=_recall": 0.80980271270037,
+    "eval_=_support": 3244.0,
+    "eval_>_f1-score": 0.9064616988014591,
+    "eval_>_precision": 0.9113294040602489,
+    "eval_>_recall": 0.9016457172476351,
+    "eval_>_support": 7717.0,
+    "eval_accuracy": 0.88105,
+    "eval_loss": 0.18984687328338623,
+    "eval_macro_avg_f1-score": 0.8465360741805081,
+    "eval_macro_avg_precision": 0.8406993251974735,
+    "eval_macro_avg_recall": 0.8529001201109565,
+    "eval_macro_avg_support": 20000.0,
+    "eval_mem_cpu_alloc_delta": -269582336,
+    "eval_mem_cpu_peaked_delta": 269582336,
+    "eval_mem_gpu_alloc_delta": 0,
+    "eval_mem_gpu_peaked_delta": 1242539008,
+    "eval_runtime": 14.1119,
+    "eval_samples": 20000,
+    "eval_samples_per_second": 1417.242,
+    "eval_steps_per_second": 5.598,
+    "eval_weighted_avg_f1-score": 0.8815054969246255,
+    "eval_weighted_avg_precision": 0.8821322092918199,
+    "eval_weighted_avg_recall": 0.88105,
+    "eval_weighted_avg_support": 20000.0
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,19 @@

+{
+    "before_init_mem_cpu": 3866619904,
+    "before_init_mem_gpu": 512,
+    "epoch": 5.0,
+    "init_mem_cpu_alloc_delta": 18591744,
+    "init_mem_cpu_peaked_delta": 0,
+    "init_mem_gpu_alloc_delta": 0,
+    "init_mem_gpu_peaked_delta": 0,
+    "total_flos": 2.2572496552911176e+18,
+    "train_loss": 0.3141621667137878,
+    "train_mem_cpu_alloc_delta": 827277312,
+    "train_mem_cpu_peaked_delta": 311078912,
+    "train_mem_gpu_alloc_delta": 757461504,
+    "train_mem_gpu_peaked_delta": 19341542400,
+    "train_runtime": 24803.7,
+    "train_samples": 1386134,
+    "train_samples_per_second": 1676.525,
+    "train_steps_per_second": 3.274
+}

trainer_state.json ADDED Viewed

The diff for this file is too large to render. See raw diff