Upload model with metrics - 2024-12-12 01:33

Browse files

Files changed (8) hide show

README.md +11 -0
checkpoint-35/model.safetensors +1 -1
checkpoint-35/trainer_state.json +19 -19
checkpoint-64/model.safetensors +1 -1
checkpoint-64/trainer_state.json +19 -19
metrics.json +24 -6
model.safetensors +1 -1
trainer_state.json +23 -23

README.md CHANGED Viewed

@@ -25,6 +25,10 @@ This model is trained for document classification using vision transformers (DiT
 * Learning Rate: 0.002
 * Number of Epochs: 1
 * Mixed Precision: BF16
 ## Usage
@@ -44,3 +48,10 @@ inputs = processor(image, return_tensors="pt")
 outputs = model(**inputs)
 predicted_label = outputs.logits.argmax(-1).item()
 ```

 * Learning Rate: 0.002
 * Number of Epochs: 1
 * Mixed Precision: BF16
+* Gradient Accumulation Steps: 2
+* Weight Decay: 0.01
+* Learning Rate Schedule: cosine_with_restarts
+* Warmup Ratio: 0.1
 ## Usage
 outputs = model(**inputs)
 predicted_label = outputs.logits.argmax(-1).item()
 ```
+## Model Performance
+The model was evaluated on a held-out test set. The metrics above show the model's performance on both training and evaluation data. Key metrics to note:
+- Accuracy: Overall classification accuracy
+- F1 Score: Weighted average F1-score across all classes
+- Precision and Recall: Weighted averages across all classes

checkpoint-35/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:842438b2a71dff9bd8051dc5cd455a2ffda7d106f391a8ddd42424b10a27ec96
 size 1213526036

 version https://git-lfs.github.com/spec/v1
+oid sha256:40862837ac36823a66434d0e8fb1b8dcebd62b09379dec3f443c3d4180c26d6f
 size 1213526036

checkpoint-35/trainer_state.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "best_metric": 0.13999451001921492,
   "best_model_checkpoint": "ds3-img-classification/checkpoint-35",
   "epoch": 0.5426356589147286,
   "eval_steps": 35,
@@ -10,34 +10,34 @@
   "log_history": [
     {
       "epoch": 0.015503875968992248,
-      "grad_norm": 12.836527824401855,
       "learning_rate": 0.0002857142857142857,
-      "loss": 5.7815,
       "step": 1
     },
     {
       "epoch": 0.5426356589147286,
-      "grad_norm": 6.243555545806885,
       "learning_rate": 0.0010275543423681622,
-      "loss": 4.9108,
       "step": 35
     },
     {
       "epoch": 0.5426356589147286,
-      "eval_accuracy": 0.13999451001921492,
-      "eval_loss": 5.21905517578125,
-      "eval_macro_f1": 0.04648934733573823,
-      "eval_macro_precision": 0.03716366453858833,
-      "eval_macro_recall": 0.09867228278248098,
-      "eval_micro_f1": 0.13999451001921492,
-      "eval_micro_precision": 0.13999451001921492,
-      "eval_micro_recall": 0.13999451001921492,
-      "eval_runtime": 8.2153,
-      "eval_samples_per_second": 443.44,
-      "eval_steps_per_second": 3.53,
-      "eval_weighted_f1": 0.0724726180615966,
-      "eval_weighted_precision": 0.06178867753876749,
-      "eval_weighted_recall": 0.13999451001921492,
       "step": 35
     }
   ],

 {
+  "best_metric": 0.13203403788086743,
   "best_model_checkpoint": "ds3-img-classification/checkpoint-35",
   "epoch": 0.5426356589147286,
   "eval_steps": 35,
   "log_history": [
     {
       "epoch": 0.015503875968992248,
+      "grad_norm": 13.281591415405273,
       "learning_rate": 0.0002857142857142857,
+      "loss": 5.8916,
       "step": 1
     },
     {
       "epoch": 0.5426356589147286,
+      "grad_norm": 4.951363563537598,
       "learning_rate": 0.0010275543423681622,
+      "loss": 5.1595,
       "step": 35
     },
     {
       "epoch": 0.5426356589147286,
+      "eval_accuracy": 0.13203403788086743,
+      "eval_loss": 5.00415563583374,
+      "eval_macro_f1": 0.03707599079404095,
+      "eval_macro_precision": 0.05261629127568818,
+      "eval_macro_recall": 0.06397651980374536,
+      "eval_micro_f1": 0.13203403788086743,
+      "eval_micro_precision": 0.13203403788086743,
+      "eval_micro_recall": 0.13203403788086743,
+      "eval_runtime": 8.4066,
+      "eval_samples_per_second": 433.352,
+      "eval_steps_per_second": 3.45,
+      "eval_weighted_f1": 0.054356566877609806,
+      "eval_weighted_precision": 0.06787536777484662,
+      "eval_weighted_recall": 0.13203403788086743,
       "step": 35
     }
   ],

checkpoint-64/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b95a5acea4810d62c8e91883a28ffee70ed6514bb963bcbeff5fd3d711ff1f10
 size 1213526036

 version https://git-lfs.github.com/spec/v1
+oid sha256:7e71729c412a7000ba3fbaf09cdfcefe3e297c814b16c8153fc6dcab8efa3a04
 size 1213526036

checkpoint-64/trainer_state.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "best_metric": 0.13999451001921492,
   "best_model_checkpoint": "ds3-img-classification/checkpoint-35",
   "epoch": 0.9922480620155039,
   "eval_steps": 35,
@@ -10,34 +10,34 @@
   "log_history": [
     {
       "epoch": 0.015503875968992248,
-      "grad_norm": 12.836527824401855,
       "learning_rate": 0.0002857142857142857,
-      "loss": 5.7815,
       "step": 1
     },
     {
       "epoch": 0.5426356589147286,
-      "grad_norm": 6.243555545806885,
       "learning_rate": 0.0010275543423681622,
-      "loss": 4.9108,
       "step": 35
     },
     {
       "epoch": 0.5426356589147286,
-      "eval_accuracy": 0.13999451001921492,
-      "eval_loss": 5.21905517578125,
-      "eval_macro_f1": 0.04648934733573823,
-      "eval_macro_precision": 0.03716366453858833,
-      "eval_macro_recall": 0.09867228278248098,
-      "eval_micro_f1": 0.13999451001921492,
-      "eval_micro_precision": 0.13999451001921492,
-      "eval_micro_recall": 0.13999451001921492,
-      "eval_runtime": 8.2153,
-      "eval_samples_per_second": 443.44,
-      "eval_steps_per_second": 3.53,
-      "eval_weighted_f1": 0.0724726180615966,
-      "eval_weighted_precision": 0.06178867753876749,
-      "eval_weighted_recall": 0.13999451001921492,
       "step": 35
     }
   ],

 {
+  "best_metric": 0.13203403788086743,
   "best_model_checkpoint": "ds3-img-classification/checkpoint-35",
   "epoch": 0.9922480620155039,
   "eval_steps": 35,
   "log_history": [
     {
       "epoch": 0.015503875968992248,
+      "grad_norm": 13.281591415405273,
       "learning_rate": 0.0002857142857142857,
+      "loss": 5.8916,
       "step": 1
     },
     {
       "epoch": 0.5426356589147286,
+      "grad_norm": 4.951363563537598,
       "learning_rate": 0.0010275543423681622,
+      "loss": 5.1595,
       "step": 35
     },
     {
       "epoch": 0.5426356589147286,
+      "eval_accuracy": 0.13203403788086743,
+      "eval_loss": 5.00415563583374,
+      "eval_macro_f1": 0.03707599079404095,
+      "eval_macro_precision": 0.05261629127568818,
+      "eval_macro_recall": 0.06397651980374536,
+      "eval_micro_f1": 0.13203403788086743,
+      "eval_micro_precision": 0.13203403788086743,
+      "eval_micro_recall": 0.13203403788086743,
+      "eval_runtime": 8.4066,
+      "eval_samples_per_second": 433.352,
+      "eval_steps_per_second": 3.45,
+      "eval_weighted_f1": 0.054356566877609806,
+      "eval_weighted_precision": 0.06787536777484662,
+      "eval_weighted_recall": 0.13203403788086743,
       "step": 35
     }
   ],

metrics.json CHANGED Viewed

@@ -1,8 +1,26 @@
 {
-  "train_runtime": 163.0123,
-  "train_samples_per_second": 201.126,
-  "train_steps_per_second": 0.393,
-  "total_flos": 8.978215898519175e+18,
-  "train_loss": 4.277425870299339,
-  "epoch": 0.9922480620155039
 }

 {
+  "train": {
+    "train_runtime": 163.8077,
+    "train_samples_per_second": 200.149,
+    "train_steps_per_second": 0.391,
+    "total_flos": 8.978215898519175e+18,
+    "train_loss": 4.830539099872112,
+    "epoch": 0.9922480620155039
+  },
+  "eval": {
+    "eval_loss": 5.00415563583374,
+    "eval_accuracy": 0.13203403788086743,
+    "eval_weighted_f1": 0.054356566877609806,
+    "eval_micro_f1": 0.13203403788086743,
+    "eval_macro_f1": 0.03707599079404095,
+    "eval_weighted_recall": 0.13203403788086743,
+    "eval_micro_recall": 0.13203403788086743,
+    "eval_macro_recall": 0.06397651980374536,
+    "eval_weighted_precision": 0.06787536777484662,
+    "eval_micro_precision": 0.13203403788086743,
+    "eval_macro_precision": 0.05261629127568818,
+    "eval_runtime": 8.4066,
+    "eval_samples_per_second": 433.352,
+    "eval_steps_per_second": 3.45
+  }
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:842438b2a71dff9bd8051dc5cd455a2ffda7d106f391a8ddd42424b10a27ec96
 size 1213526036

 version https://git-lfs.github.com/spec/v1
+oid sha256:40862837ac36823a66434d0e8fb1b8dcebd62b09379dec3f443c3d4180c26d6f
 size 1213526036

trainer_state.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "best_metric": 0.13999451001921492,
   "best_model_checkpoint": "ds3-img-classification/checkpoint-35",
   "epoch": 0.9922480620155039,
   "eval_steps": 35,
@@ -10,44 +10,44 @@
   "log_history": [
     {
       "epoch": 0.015503875968992248,
-      "grad_norm": 12.836527824401855,
       "learning_rate": 0.0002857142857142857,
-      "loss": 5.7815,
       "step": 1
     },
     {
       "epoch": 0.5426356589147286,
-      "grad_norm": 6.243555545806885,
       "learning_rate": 0.0010275543423681622,
-      "loss": 4.9108,
       "step": 35
     },
     {
       "epoch": 0.5426356589147286,
-      "eval_accuracy": 0.13999451001921492,
-      "eval_loss": 5.21905517578125,
-      "eval_macro_f1": 0.04648934733573823,
-      "eval_macro_precision": 0.03716366453858833,
-      "eval_macro_recall": 0.09867228278248098,
-      "eval_micro_f1": 0.13999451001921492,
-      "eval_micro_precision": 0.13999451001921492,
-      "eval_micro_recall": 0.13999451001921492,
-      "eval_runtime": 8.2153,
-      "eval_samples_per_second": 443.44,
-      "eval_steps_per_second": 3.53,
-      "eval_weighted_f1": 0.0724726180615966,
-      "eval_weighted_precision": 0.06178867753876749,
-      "eval_weighted_recall": 0.13999451001921492,
       "step": 35
     },
     {
       "epoch": 0.9922480620155039,
       "step": 64,
       "total_flos": 8.978215898519175e+18,
-      "train_loss": 4.277425870299339,
-      "train_runtime": 163.0123,
-      "train_samples_per_second": 201.126,
-      "train_steps_per_second": 0.393
     }
   ],
   "logging_steps": 35,

 {
+  "best_metric": 0.13203403788086743,
   "best_model_checkpoint": "ds3-img-classification/checkpoint-35",
   "epoch": 0.9922480620155039,
   "eval_steps": 35,
   "log_history": [
     {
       "epoch": 0.015503875968992248,
+      "grad_norm": 13.281591415405273,
       "learning_rate": 0.0002857142857142857,
+      "loss": 5.8916,
       "step": 1
     },
     {
       "epoch": 0.5426356589147286,
+      "grad_norm": 4.951363563537598,
       "learning_rate": 0.0010275543423681622,
+      "loss": 5.1595,
       "step": 35
     },
     {
       "epoch": 0.5426356589147286,
+      "eval_accuracy": 0.13203403788086743,
+      "eval_loss": 5.00415563583374,
+      "eval_macro_f1": 0.03707599079404095,
+      "eval_macro_precision": 0.05261629127568818,
+      "eval_macro_recall": 0.06397651980374536,
+      "eval_micro_f1": 0.13203403788086743,
+      "eval_micro_precision": 0.13203403788086743,
+      "eval_micro_recall": 0.13203403788086743,
+      "eval_runtime": 8.4066,
+      "eval_samples_per_second": 433.352,
+      "eval_steps_per_second": 3.45,
+      "eval_weighted_f1": 0.054356566877609806,
+      "eval_weighted_precision": 0.06787536777484662,
+      "eval_weighted_recall": 0.13203403788086743,
       "step": 35
     },
     {
       "epoch": 0.9922480620155039,
       "step": 64,
       "total_flos": 8.978215898519175e+18,
+      "train_loss": 4.830539099872112,
+      "train_runtime": 163.8077,
+      "train_samples_per_second": 200.149,
+      "train_steps_per_second": 0.391
     }
   ],
   "logging_steps": 35,