sara-nabhani commited on
Commit
60cd22e
·
1 Parent(s): 3cdf372

Training in progress, step 1000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f64eb00a81e36715c933630bbedd1e6f963107105001649122261384d496e16a
3
  size 2843370360
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ccbcaebfbb76527ceb05727868c381237a1a0817d744ea9290958afffa2eabe
3
  size 2843370360
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b459f1fb342e2b0b281660cd52bb52d7ef3398b855eb214494208049233b930
3
  size 1421660981
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:063bd6581e269cf7fc1f68d19960a05c6ea27758161676f7fbc5d4c6a0c03e31
3
  size 1421660981
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ce148be18af7e294e208c23394ce790567ade1b147c385e87109b039a2f3e4e
3
- size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5f5203f955ff1ed135286a43aa26372be9c45be922fb743af25a7b85b6c2fbe
3
+ size 14639
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8636707e906db799535a0e24d8b11f4682008d2f9dce23ea4add566cf191f0eb
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68a6f395aca12d068a355806ad25a39cc1cf9ac8d7df16949ceeaa9f78f9ba22
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 0.15828796157072217,
3
- "best_model_checkpoint": "/home2/s5432073/language-tech-project/results/ltp-roberta-large-defaultltp-roberta-large-default-9/checkpoint-200",
4
- "epoch": 1.183431952662722,
5
- "global_step": 200,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -43,11 +43,159 @@
43
  "eval_samples_per_second": 724.543,
44
  "eval_steps_per_second": 22.929,
45
  "step": 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  }
47
  ],
48
  "max_steps": 1690,
49
  "num_train_epochs": 10,
50
- "total_flos": 1089865359765408.0,
51
  "trial_name": null,
52
  "trial_params": null
53
  }
 
1
  {
2
+ "best_metric": 0.37526293741944416,
3
+ "best_model_checkpoint": "/home2/s5432073/language-tech-project/results/ltp-roberta-large-defaultltp-roberta-large-default-9/checkpoint-1000",
4
+ "epoch": 5.9171597633136095,
5
+ "global_step": 1000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
43
  "eval_samples_per_second": 724.543,
44
  "eval_steps_per_second": 22.929,
45
  "step": 200
46
+ },
47
+ {
48
+ "epoch": 2.37,
49
+ "learning_rate": 8.037383177570094e-06,
50
+ "loss": 0.3276,
51
+ "step": 400
52
+ },
53
+ {
54
+ "epoch": 2.37,
55
+ "eval_f1": 0.27070745440431776,
56
+ "eval_f1_all": [
57
+ 0.3987730061349693,
58
+ 0.37480314960629924,
59
+ 0.0,
60
+ 0.0,
61
+ 0.6236125126135217,
62
+ 0.05917159763313609,
63
+ 0.3404255319148936,
64
+ 0.0,
65
+ 0.7367021276595745,
66
+ 0.5967130214917826,
67
+ 0.11518324607329843,
68
+ 0.4112149532710281,
69
+ 0.0,
70
+ 0.0,
71
+ 0.15251798561151078,
72
+ 0.0,
73
+ 0.6607431340872374,
74
+ 0.624390243902439,
75
+ 0.017621145374449337,
76
+ 0.30227743271221535
77
+ ],
78
+ "eval_loss": 0.3264588415622711,
79
+ "eval_runtime": 2.8315,
80
+ "eval_samples_per_second": 669.617,
81
+ "eval_steps_per_second": 21.19,
82
+ "step": 400
83
+ },
84
+ {
85
+ "epoch": 3.55,
86
+ "learning_rate": 6.791277258566978e-06,
87
+ "loss": 0.2941,
88
+ "step": 600
89
+ },
90
+ {
91
+ "epoch": 3.55,
92
+ "eval_f1": 0.3284303777422183,
93
+ "eval_f1_all": [
94
+ 0.5230769230769231,
95
+ 0.5660847880299252,
96
+ 0.014388489208633093,
97
+ 0.09090909090909091,
98
+ 0.6258776328986961,
99
+ 0.0588235294117647,
100
+ 0.2923976608187135,
101
+ 0.0,
102
+ 0.7289595758780648,
103
+ 0.6356589147286822,
104
+ 0.28971962616822433,
105
+ 0.40816326530612246,
106
+ 0.0,
107
+ 0.015625,
108
+ 0.47457627118644063,
109
+ 0.06382978723404255,
110
+ 0.6955153422501967,
111
+ 0.635897435897436,
112
+ 0.09795918367346938,
113
+ 0.3511450381679389
114
+ ],
115
+ "eval_loss": 0.31264743208885193,
116
+ "eval_runtime": 2.8103,
117
+ "eval_samples_per_second": 674.672,
118
+ "eval_steps_per_second": 21.35,
119
+ "step": 600
120
+ },
121
+ {
122
+ "epoch": 4.73,
123
+ "learning_rate": 5.545171339563863e-06,
124
+ "loss": 0.2732,
125
+ "step": 800
126
+ },
127
+ {
128
+ "epoch": 4.73,
129
+ "eval_f1": 0.3619965683236828,
130
+ "eval_f1_all": [
131
+ 0.5528089887640449,
132
+ 0.5821596244131456,
133
+ 0.0945945945945946,
134
+ 0.18965517241379312,
135
+ 0.6472868217054263,
136
+ 0.13829787234042554,
137
+ 0.3692307692307692,
138
+ 0.0,
139
+ 0.7455386649041639,
140
+ 0.638095238095238,
141
+ 0.37606837606837606,
142
+ 0.4579025110782866,
143
+ 0.0,
144
+ 0.015503875968992248,
145
+ 0.5087179487179486,
146
+ 0.07719298245614035,
147
+ 0.6716917922948074,
148
+ 0.6666666666666667,
149
+ 0.08298755186721991,
150
+ 0.425531914893617
151
+ ],
152
+ "eval_loss": 0.3080659508705139,
153
+ "eval_runtime": 2.7772,
154
+ "eval_samples_per_second": 682.698,
155
+ "eval_steps_per_second": 21.604,
156
+ "step": 800
157
+ },
158
+ {
159
+ "epoch": 5.92,
160
+ "learning_rate": 4.299065420560748e-06,
161
+ "loss": 0.2559,
162
+ "step": 1000
163
+ },
164
+ {
165
+ "epoch": 5.92,
166
+ "eval_f1": 0.37526293741944416,
167
+ "eval_f1_all": [
168
+ 0.5243619489559165,
169
+ 0.5671641791044777,
170
+ 0.06896551724137931,
171
+ 0.1724137931034483,
172
+ 0.6521327014218009,
173
+ 0.11363636363636363,
174
+ 0.3711340206185567,
175
+ 0.0,
176
+ 0.7530312699425653,
177
+ 0.6465116279069768,
178
+ 0.42857142857142855,
179
+ 0.5034387895460799,
180
+ 0.0,
181
+ 0.015384615384615385,
182
+ 0.5711743772241993,
183
+ 0.17629179331306988,
184
+ 0.6535162950257289,
185
+ 0.6972477064220184,
186
+ 0.11290322580645162,
187
+ 0.4773790951638066
188
+ ],
189
+ "eval_loss": 0.30820420384407043,
190
+ "eval_runtime": 2.6936,
191
+ "eval_samples_per_second": 703.901,
192
+ "eval_steps_per_second": 22.275,
193
+ "step": 1000
194
  }
195
  ],
196
  "max_steps": 1690,
197
  "num_train_epochs": 10,
198
+ "total_flos": 5502978001057296.0,
199
  "trial_name": null,
200
  "trial_params": null
201
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b459f1fb342e2b0b281660cd52bb52d7ef3398b855eb214494208049233b930
3
  size 1421660981
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:063bd6581e269cf7fc1f68d19960a05c6ea27758161676f7fbc5d4c6a0c03e31
3
  size 1421660981