jnmrr commited on
Commit
aca0a54
·
verified ·
1 Parent(s): eec1cd1

Upload model with metrics - 2024-12-12 01:33

Browse files
README.md CHANGED
@@ -25,6 +25,10 @@ This model is trained for document classification using vision transformers (DiT
25
  * Learning Rate: 0.002
26
  * Number of Epochs: 1
27
  * Mixed Precision: BF16
 
 
 
 
28
 
29
  ## Usage
30
 
@@ -44,3 +48,10 @@ inputs = processor(image, return_tensors="pt")
44
  outputs = model(**inputs)
45
  predicted_label = outputs.logits.argmax(-1).item()
46
  ```
 
 
 
 
 
 
 
 
25
  * Learning Rate: 0.002
26
  * Number of Epochs: 1
27
  * Mixed Precision: BF16
28
+ * Gradient Accumulation Steps: 2
29
+ * Weight Decay: 0.01
30
+ * Learning Rate Schedule: cosine_with_restarts
31
+ * Warmup Ratio: 0.1
32
 
33
  ## Usage
34
 
 
48
  outputs = model(**inputs)
49
  predicted_label = outputs.logits.argmax(-1).item()
50
  ```
51
+
52
+ ## Model Performance
53
+
54
+ The model was evaluated on a held-out test set. The metrics above show the model's performance on both training and evaluation data. Key metrics to note:
55
+ - Accuracy: Overall classification accuracy
56
+ - F1 Score: Weighted average F1-score across all classes
57
+ - Precision and Recall: Weighted averages across all classes
checkpoint-35/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:842438b2a71dff9bd8051dc5cd455a2ffda7d106f391a8ddd42424b10a27ec96
3
  size 1213526036
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40862837ac36823a66434d0e8fb1b8dcebd62b09379dec3f443c3d4180c26d6f
3
  size 1213526036
checkpoint-35/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.13999451001921492,
3
  "best_model_checkpoint": "ds3-img-classification/checkpoint-35",
4
  "epoch": 0.5426356589147286,
5
  "eval_steps": 35,
@@ -10,34 +10,34 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.015503875968992248,
13
- "grad_norm": 12.836527824401855,
14
  "learning_rate": 0.0002857142857142857,
15
- "loss": 5.7815,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 0.5426356589147286,
20
- "grad_norm": 6.243555545806885,
21
  "learning_rate": 0.0010275543423681622,
22
- "loss": 4.9108,
23
  "step": 35
24
  },
25
  {
26
  "epoch": 0.5426356589147286,
27
- "eval_accuracy": 0.13999451001921492,
28
- "eval_loss": 5.21905517578125,
29
- "eval_macro_f1": 0.04648934733573823,
30
- "eval_macro_precision": 0.03716366453858833,
31
- "eval_macro_recall": 0.09867228278248098,
32
- "eval_micro_f1": 0.13999451001921492,
33
- "eval_micro_precision": 0.13999451001921492,
34
- "eval_micro_recall": 0.13999451001921492,
35
- "eval_runtime": 8.2153,
36
- "eval_samples_per_second": 443.44,
37
- "eval_steps_per_second": 3.53,
38
- "eval_weighted_f1": 0.0724726180615966,
39
- "eval_weighted_precision": 0.06178867753876749,
40
- "eval_weighted_recall": 0.13999451001921492,
41
  "step": 35
42
  }
43
  ],
 
1
  {
2
+ "best_metric": 0.13203403788086743,
3
  "best_model_checkpoint": "ds3-img-classification/checkpoint-35",
4
  "epoch": 0.5426356589147286,
5
  "eval_steps": 35,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.015503875968992248,
13
+ "grad_norm": 13.281591415405273,
14
  "learning_rate": 0.0002857142857142857,
15
+ "loss": 5.8916,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 0.5426356589147286,
20
+ "grad_norm": 4.951363563537598,
21
  "learning_rate": 0.0010275543423681622,
22
+ "loss": 5.1595,
23
  "step": 35
24
  },
25
  {
26
  "epoch": 0.5426356589147286,
27
+ "eval_accuracy": 0.13203403788086743,
28
+ "eval_loss": 5.00415563583374,
29
+ "eval_macro_f1": 0.03707599079404095,
30
+ "eval_macro_precision": 0.05261629127568818,
31
+ "eval_macro_recall": 0.06397651980374536,
32
+ "eval_micro_f1": 0.13203403788086743,
33
+ "eval_micro_precision": 0.13203403788086743,
34
+ "eval_micro_recall": 0.13203403788086743,
35
+ "eval_runtime": 8.4066,
36
+ "eval_samples_per_second": 433.352,
37
+ "eval_steps_per_second": 3.45,
38
+ "eval_weighted_f1": 0.054356566877609806,
39
+ "eval_weighted_precision": 0.06787536777484662,
40
+ "eval_weighted_recall": 0.13203403788086743,
41
  "step": 35
42
  }
43
  ],
checkpoint-64/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b95a5acea4810d62c8e91883a28ffee70ed6514bb963bcbeff5fd3d711ff1f10
3
  size 1213526036
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e71729c412a7000ba3fbaf09cdfcefe3e297c814b16c8153fc6dcab8efa3a04
3
  size 1213526036
checkpoint-64/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.13999451001921492,
3
  "best_model_checkpoint": "ds3-img-classification/checkpoint-35",
4
  "epoch": 0.9922480620155039,
5
  "eval_steps": 35,
@@ -10,34 +10,34 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.015503875968992248,
13
- "grad_norm": 12.836527824401855,
14
  "learning_rate": 0.0002857142857142857,
15
- "loss": 5.7815,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 0.5426356589147286,
20
- "grad_norm": 6.243555545806885,
21
  "learning_rate": 0.0010275543423681622,
22
- "loss": 4.9108,
23
  "step": 35
24
  },
25
  {
26
  "epoch": 0.5426356589147286,
27
- "eval_accuracy": 0.13999451001921492,
28
- "eval_loss": 5.21905517578125,
29
- "eval_macro_f1": 0.04648934733573823,
30
- "eval_macro_precision": 0.03716366453858833,
31
- "eval_macro_recall": 0.09867228278248098,
32
- "eval_micro_f1": 0.13999451001921492,
33
- "eval_micro_precision": 0.13999451001921492,
34
- "eval_micro_recall": 0.13999451001921492,
35
- "eval_runtime": 8.2153,
36
- "eval_samples_per_second": 443.44,
37
- "eval_steps_per_second": 3.53,
38
- "eval_weighted_f1": 0.0724726180615966,
39
- "eval_weighted_precision": 0.06178867753876749,
40
- "eval_weighted_recall": 0.13999451001921492,
41
  "step": 35
42
  }
43
  ],
 
1
  {
2
+ "best_metric": 0.13203403788086743,
3
  "best_model_checkpoint": "ds3-img-classification/checkpoint-35",
4
  "epoch": 0.9922480620155039,
5
  "eval_steps": 35,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.015503875968992248,
13
+ "grad_norm": 13.281591415405273,
14
  "learning_rate": 0.0002857142857142857,
15
+ "loss": 5.8916,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 0.5426356589147286,
20
+ "grad_norm": 4.951363563537598,
21
  "learning_rate": 0.0010275543423681622,
22
+ "loss": 5.1595,
23
  "step": 35
24
  },
25
  {
26
  "epoch": 0.5426356589147286,
27
+ "eval_accuracy": 0.13203403788086743,
28
+ "eval_loss": 5.00415563583374,
29
+ "eval_macro_f1": 0.03707599079404095,
30
+ "eval_macro_precision": 0.05261629127568818,
31
+ "eval_macro_recall": 0.06397651980374536,
32
+ "eval_micro_f1": 0.13203403788086743,
33
+ "eval_micro_precision": 0.13203403788086743,
34
+ "eval_micro_recall": 0.13203403788086743,
35
+ "eval_runtime": 8.4066,
36
+ "eval_samples_per_second": 433.352,
37
+ "eval_steps_per_second": 3.45,
38
+ "eval_weighted_f1": 0.054356566877609806,
39
+ "eval_weighted_precision": 0.06787536777484662,
40
+ "eval_weighted_recall": 0.13203403788086743,
41
  "step": 35
42
  }
43
  ],
metrics.json CHANGED
@@ -1,8 +1,26 @@
1
  {
2
- "train_runtime": 163.0123,
3
- "train_samples_per_second": 201.126,
4
- "train_steps_per_second": 0.393,
5
- "total_flos": 8.978215898519175e+18,
6
- "train_loss": 4.277425870299339,
7
- "epoch": 0.9922480620155039
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  }
 
1
  {
2
+ "train": {
3
+ "train_runtime": 163.8077,
4
+ "train_samples_per_second": 200.149,
5
+ "train_steps_per_second": 0.391,
6
+ "total_flos": 8.978215898519175e+18,
7
+ "train_loss": 4.830539099872112,
8
+ "epoch": 0.9922480620155039
9
+ },
10
+ "eval": {
11
+ "eval_loss": 5.00415563583374,
12
+ "eval_accuracy": 0.13203403788086743,
13
+ "eval_weighted_f1": 0.054356566877609806,
14
+ "eval_micro_f1": 0.13203403788086743,
15
+ "eval_macro_f1": 0.03707599079404095,
16
+ "eval_weighted_recall": 0.13203403788086743,
17
+ "eval_micro_recall": 0.13203403788086743,
18
+ "eval_macro_recall": 0.06397651980374536,
19
+ "eval_weighted_precision": 0.06787536777484662,
20
+ "eval_micro_precision": 0.13203403788086743,
21
+ "eval_macro_precision": 0.05261629127568818,
22
+ "eval_runtime": 8.4066,
23
+ "eval_samples_per_second": 433.352,
24
+ "eval_steps_per_second": 3.45
25
+ }
26
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:842438b2a71dff9bd8051dc5cd455a2ffda7d106f391a8ddd42424b10a27ec96
3
  size 1213526036
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40862837ac36823a66434d0e8fb1b8dcebd62b09379dec3f443c3d4180c26d6f
3
  size 1213526036
trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.13999451001921492,
3
  "best_model_checkpoint": "ds3-img-classification/checkpoint-35",
4
  "epoch": 0.9922480620155039,
5
  "eval_steps": 35,
@@ -10,44 +10,44 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.015503875968992248,
13
- "grad_norm": 12.836527824401855,
14
  "learning_rate": 0.0002857142857142857,
15
- "loss": 5.7815,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 0.5426356589147286,
20
- "grad_norm": 6.243555545806885,
21
  "learning_rate": 0.0010275543423681622,
22
- "loss": 4.9108,
23
  "step": 35
24
  },
25
  {
26
  "epoch": 0.5426356589147286,
27
- "eval_accuracy": 0.13999451001921492,
28
- "eval_loss": 5.21905517578125,
29
- "eval_macro_f1": 0.04648934733573823,
30
- "eval_macro_precision": 0.03716366453858833,
31
- "eval_macro_recall": 0.09867228278248098,
32
- "eval_micro_f1": 0.13999451001921492,
33
- "eval_micro_precision": 0.13999451001921492,
34
- "eval_micro_recall": 0.13999451001921492,
35
- "eval_runtime": 8.2153,
36
- "eval_samples_per_second": 443.44,
37
- "eval_steps_per_second": 3.53,
38
- "eval_weighted_f1": 0.0724726180615966,
39
- "eval_weighted_precision": 0.06178867753876749,
40
- "eval_weighted_recall": 0.13999451001921492,
41
  "step": 35
42
  },
43
  {
44
  "epoch": 0.9922480620155039,
45
  "step": 64,
46
  "total_flos": 8.978215898519175e+18,
47
- "train_loss": 4.277425870299339,
48
- "train_runtime": 163.0123,
49
- "train_samples_per_second": 201.126,
50
- "train_steps_per_second": 0.393
51
  }
52
  ],
53
  "logging_steps": 35,
 
1
  {
2
+ "best_metric": 0.13203403788086743,
3
  "best_model_checkpoint": "ds3-img-classification/checkpoint-35",
4
  "epoch": 0.9922480620155039,
5
  "eval_steps": 35,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.015503875968992248,
13
+ "grad_norm": 13.281591415405273,
14
  "learning_rate": 0.0002857142857142857,
15
+ "loss": 5.8916,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 0.5426356589147286,
20
+ "grad_norm": 4.951363563537598,
21
  "learning_rate": 0.0010275543423681622,
22
+ "loss": 5.1595,
23
  "step": 35
24
  },
25
  {
26
  "epoch": 0.5426356589147286,
27
+ "eval_accuracy": 0.13203403788086743,
28
+ "eval_loss": 5.00415563583374,
29
+ "eval_macro_f1": 0.03707599079404095,
30
+ "eval_macro_precision": 0.05261629127568818,
31
+ "eval_macro_recall": 0.06397651980374536,
32
+ "eval_micro_f1": 0.13203403788086743,
33
+ "eval_micro_precision": 0.13203403788086743,
34
+ "eval_micro_recall": 0.13203403788086743,
35
+ "eval_runtime": 8.4066,
36
+ "eval_samples_per_second": 433.352,
37
+ "eval_steps_per_second": 3.45,
38
+ "eval_weighted_f1": 0.054356566877609806,
39
+ "eval_weighted_precision": 0.06787536777484662,
40
+ "eval_weighted_recall": 0.13203403788086743,
41
  "step": 35
42
  },
43
  {
44
  "epoch": 0.9922480620155039,
45
  "step": 64,
46
  "total_flos": 8.978215898519175e+18,
47
+ "train_loss": 4.830539099872112,
48
+ "train_runtime": 163.8077,
49
+ "train_samples_per_second": 200.149,
50
+ "train_steps_per_second": 0.391
51
  }
52
  ],
53
  "logging_steps": 35,