jnmrr commited on
Commit
d3a7216
·
verified ·
1 Parent(s): aca0a54

Upload model with metrics - 2024-12-12 01:56

Browse files
README.md CHANGED
@@ -30,6 +30,30 @@ This model is trained for document classification using vision transformers (DiT
30
  * Learning Rate Schedule: cosine_with_restarts
31
  * Warmup Ratio: 0.1
32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  ## Usage
34
 
35
  ```python
@@ -48,10 +72,3 @@ inputs = processor(image, return_tensors="pt")
48
  outputs = model(**inputs)
49
  predicted_label = outputs.logits.argmax(-1).item()
50
  ```
51
-
52
- ## Model Performance
53
-
54
- The model was evaluated on a held-out test set. The metrics above show the model's performance on both training and evaluation data. Key metrics to note:
55
- - Accuracy: Overall classification accuracy
56
- - F1 Score: Weighted average F1-score across all classes
57
- - Precision and Recall: Weighted averages across all classes
 
30
  * Learning Rate Schedule: cosine_with_restarts
31
  * Warmup Ratio: 0.1
32
 
33
+ ## Training and Evaluation Metrics
34
+ ### Training Metrics
35
+ * Loss: 5.2299
36
+ * Grad Norm: 5.5809
37
+ * Learning Rate: 0.0010
38
+ * Epoch: 0.5426
39
+ * Step: 35.0000
40
+
41
+ ### Evaluation Metrics
42
+ * Loss: 5.0066
43
+ * Accuracy: 0.1351
44
+ * Weighted F1: 0.0662
45
+ * Micro F1: 0.1351
46
+ * Macro F1: 0.0380
47
+ * Weighted Recall: 0.1351
48
+ * Micro Recall: 0.1351
49
+ * Macro Recall: 0.0646
50
+ * Weighted Precision: 0.0548
51
+ * Micro Precision: 0.1351
52
+ * Macro Precision: 0.0333
53
+ * Runtime: 8.2574
54
+ * Samples Per Second: 441.1820
55
+ * Steps Per Second: 3.5120
56
+
57
  ## Usage
58
 
59
  ```python
 
72
  outputs = model(**inputs)
73
  predicted_label = outputs.logits.argmax(-1).item()
74
  ```
 
 
 
 
 
 
 
checkpoint-35/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:40862837ac36823a66434d0e8fb1b8dcebd62b09379dec3f443c3d4180c26d6f
3
  size 1213526036
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e72a88f97c761c0a531cb086303001d50691997a2b12f53e5ba97ac052c09560
3
  size 1213526036
checkpoint-35/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.13203403788086743,
3
  "best_model_checkpoint": "ds3-img-classification/checkpoint-35",
4
  "epoch": 0.5426356589147286,
5
  "eval_steps": 35,
@@ -10,34 +10,34 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.015503875968992248,
13
- "grad_norm": 13.281591415405273,
14
  "learning_rate": 0.0002857142857142857,
15
- "loss": 5.8916,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 0.5426356589147286,
20
- "grad_norm": 4.951363563537598,
21
  "learning_rate": 0.0010275543423681622,
22
- "loss": 5.1595,
23
  "step": 35
24
  },
25
  {
26
  "epoch": 0.5426356589147286,
27
- "eval_accuracy": 0.13203403788086743,
28
- "eval_loss": 5.00415563583374,
29
- "eval_macro_f1": 0.03707599079404095,
30
- "eval_macro_precision": 0.05261629127568818,
31
- "eval_macro_recall": 0.06397651980374536,
32
- "eval_micro_f1": 0.13203403788086743,
33
- "eval_micro_precision": 0.13203403788086743,
34
- "eval_micro_recall": 0.13203403788086743,
35
- "eval_runtime": 8.4066,
36
- "eval_samples_per_second": 433.352,
37
- "eval_steps_per_second": 3.45,
38
- "eval_weighted_f1": 0.054356566877609806,
39
- "eval_weighted_precision": 0.06787536777484662,
40
- "eval_weighted_recall": 0.13203403788086743,
41
  "step": 35
42
  }
43
  ],
 
1
  {
2
+ "best_metric": 0.1350535273126544,
3
  "best_model_checkpoint": "ds3-img-classification/checkpoint-35",
4
  "epoch": 0.5426356589147286,
5
  "eval_steps": 35,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.015503875968992248,
13
+ "grad_norm": 16.251585006713867,
14
  "learning_rate": 0.0002857142857142857,
15
+ "loss": 6.0665,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 0.5426356589147286,
20
+ "grad_norm": 5.580934524536133,
21
  "learning_rate": 0.0010275543423681622,
22
+ "loss": 5.2299,
23
  "step": 35
24
  },
25
  {
26
  "epoch": 0.5426356589147286,
27
+ "eval_accuracy": 0.1350535273126544,
28
+ "eval_loss": 5.006561279296875,
29
+ "eval_macro_f1": 0.038001266460012215,
30
+ "eval_macro_precision": 0.033336047367919855,
31
+ "eval_macro_recall": 0.06460349403258135,
32
+ "eval_micro_f1": 0.1350535273126544,
33
+ "eval_micro_precision": 0.1350535273126544,
34
+ "eval_micro_recall": 0.1350535273126544,
35
+ "eval_runtime": 8.2574,
36
+ "eval_samples_per_second": 441.182,
37
+ "eval_steps_per_second": 3.512,
38
+ "eval_weighted_f1": 0.0662231556079451,
39
+ "eval_weighted_precision": 0.054814565244902975,
40
+ "eval_weighted_recall": 0.1350535273126544,
41
  "step": 35
42
  }
43
  ],
checkpoint-35/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6cc8ffd94025c9e00fadf213825a92f261358a56e78b613f233178b10436ef6
3
+ size 5368
checkpoint-64/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e71729c412a7000ba3fbaf09cdfcefe3e297c814b16c8153fc6dcab8efa3a04
3
  size 1213526036
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df2901e8559ae0de2bd216d039cc92f33736cca45db6be8c08de0bbfb4fdb663
3
  size 1213526036
checkpoint-64/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.13203403788086743,
3
  "best_model_checkpoint": "ds3-img-classification/checkpoint-35",
4
  "epoch": 0.9922480620155039,
5
  "eval_steps": 35,
@@ -10,34 +10,34 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.015503875968992248,
13
- "grad_norm": 13.281591415405273,
14
  "learning_rate": 0.0002857142857142857,
15
- "loss": 5.8916,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 0.5426356589147286,
20
- "grad_norm": 4.951363563537598,
21
  "learning_rate": 0.0010275543423681622,
22
- "loss": 5.1595,
23
  "step": 35
24
  },
25
  {
26
  "epoch": 0.5426356589147286,
27
- "eval_accuracy": 0.13203403788086743,
28
- "eval_loss": 5.00415563583374,
29
- "eval_macro_f1": 0.03707599079404095,
30
- "eval_macro_precision": 0.05261629127568818,
31
- "eval_macro_recall": 0.06397651980374536,
32
- "eval_micro_f1": 0.13203403788086743,
33
- "eval_micro_precision": 0.13203403788086743,
34
- "eval_micro_recall": 0.13203403788086743,
35
- "eval_runtime": 8.4066,
36
- "eval_samples_per_second": 433.352,
37
- "eval_steps_per_second": 3.45,
38
- "eval_weighted_f1": 0.054356566877609806,
39
- "eval_weighted_precision": 0.06787536777484662,
40
- "eval_weighted_recall": 0.13203403788086743,
41
  "step": 35
42
  }
43
  ],
 
1
  {
2
+ "best_metric": 0.1350535273126544,
3
  "best_model_checkpoint": "ds3-img-classification/checkpoint-35",
4
  "epoch": 0.9922480620155039,
5
  "eval_steps": 35,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.015503875968992248,
13
+ "grad_norm": 16.251585006713867,
14
  "learning_rate": 0.0002857142857142857,
15
+ "loss": 6.0665,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 0.5426356589147286,
20
+ "grad_norm": 5.580934524536133,
21
  "learning_rate": 0.0010275543423681622,
22
+ "loss": 5.2299,
23
  "step": 35
24
  },
25
  {
26
  "epoch": 0.5426356589147286,
27
+ "eval_accuracy": 0.1350535273126544,
28
+ "eval_loss": 5.006561279296875,
29
+ "eval_macro_f1": 0.038001266460012215,
30
+ "eval_macro_precision": 0.033336047367919855,
31
+ "eval_macro_recall": 0.06460349403258135,
32
+ "eval_micro_f1": 0.1350535273126544,
33
+ "eval_micro_precision": 0.1350535273126544,
34
+ "eval_micro_recall": 0.1350535273126544,
35
+ "eval_runtime": 8.2574,
36
+ "eval_samples_per_second": 441.182,
37
+ "eval_steps_per_second": 3.512,
38
+ "eval_weighted_f1": 0.0662231556079451,
39
+ "eval_weighted_precision": 0.054814565244902975,
40
+ "eval_weighted_recall": 0.1350535273126544,
41
  "step": 35
42
  }
43
  ],
checkpoint-64/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6cc8ffd94025c9e00fadf213825a92f261358a56e78b613f233178b10436ef6
3
+ size 5368
metrics.json CHANGED
@@ -1,26 +1,25 @@
1
  {
2
  "train": {
3
- "train_runtime": 163.8077,
4
- "train_samples_per_second": 200.149,
5
- "train_steps_per_second": 0.391,
6
- "total_flos": 8.978215898519175e+18,
7
- "train_loss": 4.830539099872112,
8
- "epoch": 0.9922480620155039
9
  },
10
  "eval": {
11
- "eval_loss": 5.00415563583374,
12
- "eval_accuracy": 0.13203403788086743,
13
- "eval_weighted_f1": 0.054356566877609806,
14
- "eval_micro_f1": 0.13203403788086743,
15
- "eval_macro_f1": 0.03707599079404095,
16
- "eval_weighted_recall": 0.13203403788086743,
17
- "eval_micro_recall": 0.13203403788086743,
18
- "eval_macro_recall": 0.06397651980374536,
19
- "eval_weighted_precision": 0.06787536777484662,
20
- "eval_micro_precision": 0.13203403788086743,
21
- "eval_macro_precision": 0.05261629127568818,
22
- "eval_runtime": 8.4066,
23
- "eval_samples_per_second": 433.352,
24
- "eval_steps_per_second": 3.45
25
  }
26
  }
 
1
  {
2
  "train": {
3
+ "loss": 5.2299,
4
+ "grad_norm": 5.580934524536133,
5
+ "learning_rate": 0.0010275543423681622,
6
+ "epoch": 0.5426356589147286,
7
+ "step": 35.0
 
8
  },
9
  "eval": {
10
+ "eval_loss": 5.006561279296875,
11
+ "eval_accuracy": 0.1350535273126544,
12
+ "eval_weighted_f1": 0.0662231556079451,
13
+ "eval_micro_f1": 0.1350535273126544,
14
+ "eval_macro_f1": 0.038001266460012215,
15
+ "eval_weighted_recall": 0.1350535273126544,
16
+ "eval_micro_recall": 0.1350535273126544,
17
+ "eval_macro_recall": 0.06460349403258135,
18
+ "eval_weighted_precision": 0.054814565244902975,
19
+ "eval_micro_precision": 0.1350535273126544,
20
+ "eval_macro_precision": 0.033336047367919855,
21
+ "eval_runtime": 8.2574,
22
+ "eval_samples_per_second": 441.182,
23
+ "eval_steps_per_second": 3.512
24
  }
25
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:40862837ac36823a66434d0e8fb1b8dcebd62b09379dec3f443c3d4180c26d6f
3
  size 1213526036
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e72a88f97c761c0a531cb086303001d50691997a2b12f53e5ba97ac052c09560
3
  size 1213526036
trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.13203403788086743,
3
  "best_model_checkpoint": "ds3-img-classification/checkpoint-35",
4
  "epoch": 0.9922480620155039,
5
  "eval_steps": 35,
@@ -10,44 +10,44 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.015503875968992248,
13
- "grad_norm": 13.281591415405273,
14
  "learning_rate": 0.0002857142857142857,
15
- "loss": 5.8916,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 0.5426356589147286,
20
- "grad_norm": 4.951363563537598,
21
  "learning_rate": 0.0010275543423681622,
22
- "loss": 5.1595,
23
  "step": 35
24
  },
25
  {
26
  "epoch": 0.5426356589147286,
27
- "eval_accuracy": 0.13203403788086743,
28
- "eval_loss": 5.00415563583374,
29
- "eval_macro_f1": 0.03707599079404095,
30
- "eval_macro_precision": 0.05261629127568818,
31
- "eval_macro_recall": 0.06397651980374536,
32
- "eval_micro_f1": 0.13203403788086743,
33
- "eval_micro_precision": 0.13203403788086743,
34
- "eval_micro_recall": 0.13203403788086743,
35
- "eval_runtime": 8.4066,
36
- "eval_samples_per_second": 433.352,
37
- "eval_steps_per_second": 3.45,
38
- "eval_weighted_f1": 0.054356566877609806,
39
- "eval_weighted_precision": 0.06787536777484662,
40
- "eval_weighted_recall": 0.13203403788086743,
41
  "step": 35
42
  },
43
  {
44
  "epoch": 0.9922480620155039,
45
  "step": 64,
46
  "total_flos": 8.978215898519175e+18,
47
- "train_loss": 4.830539099872112,
48
- "train_runtime": 163.8077,
49
- "train_samples_per_second": 200.149,
50
- "train_steps_per_second": 0.391
51
  }
52
  ],
53
  "logging_steps": 35,
 
1
  {
2
+ "best_metric": 0.1350535273126544,
3
  "best_model_checkpoint": "ds3-img-classification/checkpoint-35",
4
  "epoch": 0.9922480620155039,
5
  "eval_steps": 35,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.015503875968992248,
13
+ "grad_norm": 16.251585006713867,
14
  "learning_rate": 0.0002857142857142857,
15
+ "loss": 6.0665,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 0.5426356589147286,
20
+ "grad_norm": 5.580934524536133,
21
  "learning_rate": 0.0010275543423681622,
22
+ "loss": 5.2299,
23
  "step": 35
24
  },
25
  {
26
  "epoch": 0.5426356589147286,
27
+ "eval_accuracy": 0.1350535273126544,
28
+ "eval_loss": 5.006561279296875,
29
+ "eval_macro_f1": 0.038001266460012215,
30
+ "eval_macro_precision": 0.033336047367919855,
31
+ "eval_macro_recall": 0.06460349403258135,
32
+ "eval_micro_f1": 0.1350535273126544,
33
+ "eval_micro_precision": 0.1350535273126544,
34
+ "eval_micro_recall": 0.1350535273126544,
35
+ "eval_runtime": 8.2574,
36
+ "eval_samples_per_second": 441.182,
37
+ "eval_steps_per_second": 3.512,
38
+ "eval_weighted_f1": 0.0662231556079451,
39
+ "eval_weighted_precision": 0.054814565244902975,
40
+ "eval_weighted_recall": 0.1350535273126544,
41
  "step": 35
42
  },
43
  {
44
  "epoch": 0.9922480620155039,
45
  "step": 64,
46
  "total_flos": 8.978215898519175e+18,
47
+ "train_loss": 4.950647212564945,
48
+ "train_runtime": 163.8995,
49
+ "train_samples_per_second": 200.037,
50
+ "train_steps_per_second": 0.39
51
  }
52
  ],
53
  "logging_steps": 35,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f20e468d961bf3f2e3584ef964e567145fd00284f9814774d5cd6ba9208eb7fe
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6cc8ffd94025c9e00fadf213825a92f261358a56e78b613f233178b10436ef6
3
  size 5368