Aditi3004 commited on
Commit
dff885f
·
verified ·
1 Parent(s): a6f8775

Training in progress, epoch 1

Browse files
all_results.json CHANGED
@@ -1,13 +1,8 @@
1
  {
2
- "epoch": 1.0,
3
- "eval_accuracy": 0.78125,
4
- "eval_loss": 0.660746157169342,
5
- "eval_runtime": 2.4294,
6
- "eval_samples_per_second": 13.172,
7
- "eval_steps_per_second": 0.412,
8
- "total_flos": 5924677556367360.0,
9
- "train_loss": 2.0297837257385254,
10
- "train_runtime": 23.3143,
11
- "train_samples_per_second": 598.345,
12
- "train_steps_per_second": 4.289
13
  }
 
1
  {
2
+ "epoch": 13.0,
3
+ "total_flos": 7.702080823277568e+16,
4
+ "train_loss": 1.9811313824775891,
5
+ "train_runtime": 3450.3358,
6
+ "train_samples_per_second": 4.043,
7
+ "train_steps_per_second": 0.029
 
 
 
 
 
8
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b6a62e27f85591dc752f0b37a26e314bfd7da38290e78757298ec3e5c5e16540
3
  size 94302952
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:224b58631c5e2b3ba0ffaea37b124a52bd6753a485de45ac4e74c802c5e70a14
3
  size 94302952
runs/Jan09_05-37-46_a7a68c73dff9/events.out.tfevents.1736405236.a7a68c73dff9.381.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5fbdc71f0190782370124fdc5ad43317fc46f85528dccf70655d6273b1c8ef0
3
+ size 5518
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 1.0,
3
- "total_flos": 5924677556367360.0,
4
- "train_loss": 2.0297837257385254,
5
- "train_runtime": 23.3143,
6
- "train_samples_per_second": 598.345,
7
- "train_steps_per_second": 4.289
8
  }
 
1
  {
2
+ "epoch": 13.0,
3
+ "total_flos": 7.702080823277568e+16,
4
+ "train_loss": 1.9811313824775891,
5
+ "train_runtime": 3450.3358,
6
+ "train_samples_per_second": 4.043,
7
+ "train_steps_per_second": 0.029
8
  }
trainer_state.json CHANGED
@@ -1,30 +1,159 @@
1
  {
2
- "best_metric": 0.6875,
3
- "best_model_checkpoint": "resnet-50-finetuned-eurosat/checkpoint-3",
4
- "epoch": 1.0,
5
  "eval_steps": 500,
6
- "global_step": 3,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.6875,
14
- "eval_loss": 0.6569108963012695,
15
- "eval_runtime": 3.4447,
16
- "eval_samples_per_second": 9.29,
17
- "eval_steps_per_second": 0.29,
18
  "step": 3
19
  },
20
  {
21
- "epoch": 1.0,
22
- "step": 3,
23
- "total_flos": 5924677556367360.0,
24
- "train_loss": 2.0297837257385254,
25
- "train_runtime": 23.3143,
26
- "train_samples_per_second": 598.345,
27
- "train_steps_per_second": 4.289
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  }
29
  ],
30
  "logging_steps": 10,
@@ -35,11 +164,11 @@
35
  "stateful_callbacks": {
36
  "EarlyStoppingCallback": {
37
  "args": {
38
- "early_stopping_patience": 0,
39
  "early_stopping_threshold": 0.0
40
  },
41
  "attributes": {
42
- "early_stopping_patience_counter": 0
43
  }
44
  },
45
  "TrainerControl": {
@@ -53,7 +182,7 @@
53
  "attributes": {}
54
  }
55
  },
56
- "total_flos": 5924677556367360.0,
57
  "train_batch_size": 32,
58
  "trial_name": null,
59
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.5625,
3
+ "best_model_checkpoint": "resnet-50-finetuned-eurosat/checkpoint-9",
4
+ "epoch": 13.0,
5
  "eval_steps": 500,
6
+ "global_step": 39,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.5,
14
+ "eval_loss": 0.6890198588371277,
15
+ "eval_runtime": 9.1578,
16
+ "eval_samples_per_second": 3.494,
17
+ "eval_steps_per_second": 0.109,
18
  "step": 3
19
  },
20
  {
21
+ "epoch": 2.0,
22
+ "eval_accuracy": 0.5,
23
+ "eval_loss": 0.6856250166893005,
24
+ "eval_runtime": 9.6892,
25
+ "eval_samples_per_second": 3.303,
26
+ "eval_steps_per_second": 0.103,
27
+ "step": 6
28
+ },
29
+ {
30
+ "epoch": 3.0,
31
+ "eval_accuracy": 0.5625,
32
+ "eval_loss": 0.6796374917030334,
33
+ "eval_runtime": 8.1871,
34
+ "eval_samples_per_second": 3.909,
35
+ "eval_steps_per_second": 0.122,
36
+ "step": 9
37
+ },
38
+ {
39
+ "epoch": 3.4444444444444446,
40
+ "grad_norm": 6.238272666931152,
41
+ "learning_rate": 5e-05,
42
+ "loss": 2.1334,
43
+ "step": 10
44
+ },
45
+ {
46
+ "epoch": 4.0,
47
+ "eval_accuracy": 0.53125,
48
+ "eval_loss": 0.6711597442626953,
49
+ "eval_runtime": 11.5033,
50
+ "eval_samples_per_second": 2.782,
51
+ "eval_steps_per_second": 0.087,
52
+ "step": 12
53
+ },
54
+ {
55
+ "epoch": 5.0,
56
+ "eval_accuracy": 0.53125,
57
+ "eval_loss": 0.6669355630874634,
58
+ "eval_runtime": 9.588,
59
+ "eval_samples_per_second": 3.338,
60
+ "eval_steps_per_second": 0.104,
61
+ "step": 15
62
+ },
63
+ {
64
+ "epoch": 6.0,
65
+ "eval_accuracy": 0.53125,
66
+ "eval_loss": 0.6575976610183716,
67
+ "eval_runtime": 7.9262,
68
+ "eval_samples_per_second": 4.037,
69
+ "eval_steps_per_second": 0.126,
70
+ "step": 18
71
+ },
72
+ {
73
+ "epoch": 6.888888888888889,
74
+ "grad_norm": 4.825995445251465,
75
+ "learning_rate": 4.4444444444444447e-05,
76
+ "loss": 2.0715,
77
+ "step": 20
78
+ },
79
+ {
80
+ "epoch": 7.0,
81
+ "eval_accuracy": 0.53125,
82
+ "eval_loss": 0.6581233739852905,
83
+ "eval_runtime": 9.5086,
84
+ "eval_samples_per_second": 3.365,
85
+ "eval_steps_per_second": 0.105,
86
+ "step": 21
87
+ },
88
+ {
89
+ "epoch": 8.0,
90
+ "eval_accuracy": 0.5625,
91
+ "eval_loss": 0.6548225283622742,
92
+ "eval_runtime": 9.6182,
93
+ "eval_samples_per_second": 3.327,
94
+ "eval_steps_per_second": 0.104,
95
+ "step": 24
96
+ },
97
+ {
98
+ "epoch": 9.0,
99
+ "eval_accuracy": 0.5625,
100
+ "eval_loss": 0.6479971408843994,
101
+ "eval_runtime": 9.7014,
102
+ "eval_samples_per_second": 3.298,
103
+ "eval_steps_per_second": 0.103,
104
+ "step": 27
105
+ },
106
+ {
107
+ "epoch": 10.0,
108
+ "grad_norm": 2.082106828689575,
109
+ "learning_rate": 3.888888888888889e-05,
110
+ "loss": 1.8148,
111
+ "step": 30
112
+ },
113
+ {
114
+ "epoch": 10.0,
115
+ "eval_accuracy": 0.5625,
116
+ "eval_loss": 0.6456948518753052,
117
+ "eval_runtime": 7.9377,
118
+ "eval_samples_per_second": 4.031,
119
+ "eval_steps_per_second": 0.126,
120
+ "step": 30
121
+ },
122
+ {
123
+ "epoch": 11.0,
124
+ "eval_accuracy": 0.5625,
125
+ "eval_loss": 0.6377989649772644,
126
+ "eval_runtime": 9.4096,
127
+ "eval_samples_per_second": 3.401,
128
+ "eval_steps_per_second": 0.106,
129
+ "step": 33
130
+ },
131
+ {
132
+ "epoch": 12.0,
133
+ "eval_accuracy": 0.5625,
134
+ "eval_loss": 0.6368027925491333,
135
+ "eval_runtime": 8.4914,
136
+ "eval_samples_per_second": 3.769,
137
+ "eval_steps_per_second": 0.118,
138
+ "step": 36
139
+ },
140
+ {
141
+ "epoch": 13.0,
142
+ "eval_accuracy": 0.5625,
143
+ "eval_loss": 0.6324682235717773,
144
+ "eval_runtime": 9.2297,
145
+ "eval_samples_per_second": 3.467,
146
+ "eval_steps_per_second": 0.108,
147
+ "step": 39
148
+ },
149
+ {
150
+ "epoch": 13.0,
151
+ "step": 39,
152
+ "total_flos": 7.702080823277568e+16,
153
+ "train_loss": 1.9811313824775891,
154
+ "train_runtime": 3450.3358,
155
+ "train_samples_per_second": 4.043,
156
+ "train_steps_per_second": 0.029
157
  }
158
  ],
159
  "logging_steps": 10,
 
164
  "stateful_callbacks": {
165
  "EarlyStoppingCallback": {
166
  "args": {
167
+ "early_stopping_patience": 10,
168
  "early_stopping_threshold": 0.0
169
  },
170
  "attributes": {
171
+ "early_stopping_patience_counter": 10
172
  }
173
  },
174
  "TrainerControl": {
 
182
  "attributes": {}
183
  }
184
  },
185
+ "total_flos": 7.702080823277568e+16,
186
  "train_batch_size": 32,
187
  "trial_name": null,
188
  "trial_params": null