MHGanainy commited on
Commit
3aed3bd
1 Parent(s): 468e32a

MHGanainy/8-clusters-balanced-lex-best-v2-2

Browse files
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_loss": 1.5749374628067017,
4
- "eval_runtime": 30.5122,
5
- "eval_samples_per_second": 15.404,
6
- "eval_steps_per_second": 1.934,
7
- "perplexity": 4.83043952653401,
8
  "total_flos": 3.505428283392e+16,
9
- "train_loss": 1.8147928668926288,
10
- "train_runtime": 577.3896,
11
- "train_samples_per_second": 6.666,
12
- "train_steps_per_second": 3.334
13
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_loss": 1.5753202438354492,
4
+ "eval_runtime": 30.3994,
5
+ "eval_samples_per_second": 15.461,
6
+ "eval_steps_per_second": 1.941,
7
+ "perplexity": 4.832288881071616,
8
  "total_flos": 3.505428283392e+16,
9
+ "train_loss": 1.8155303717278815,
10
+ "train_runtime": 577.5445,
11
+ "train_samples_per_second": 6.664,
12
+ "train_steps_per_second": 3.333
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_loss": 1.5749374628067017,
4
- "eval_runtime": 30.5122,
5
- "eval_samples_per_second": 15.404,
6
- "eval_steps_per_second": 1.934,
7
- "perplexity": 4.83043952653401
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_loss": 1.5753202438354492,
4
+ "eval_runtime": 30.3994,
5
+ "eval_samples_per_second": 15.461,
6
+ "eval_steps_per_second": 1.941,
7
+ "perplexity": 4.832288881071616
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 3.505428283392e+16,
4
- "train_loss": 1.8147928668926288,
5
- "train_runtime": 577.3896,
6
- "train_samples_per_second": 6.666,
7
- "train_steps_per_second": 3.334
8
  }
 
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 3.505428283392e+16,
4
+ "train_loss": 1.8155303717278815,
5
+ "train_runtime": 577.5445,
6
+ "train_samples_per_second": 6.664,
7
+ "train_steps_per_second": 3.333
8
  }
trainer_state.json CHANGED
@@ -10,153 +10,153 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.05194805194805195,
13
- "grad_norm": 0.17290189862251282,
14
  "learning_rate": 1.0416666666666668e-05,
15
- "loss": 2.3684,
16
  "step": 100
17
  },
18
  {
19
  "epoch": 0.1038961038961039,
20
- "grad_norm": 0.3600994646549225,
21
  "learning_rate": 1.9998948413537803e-05,
22
- "loss": 2.3046,
23
  "step": 200
24
  },
25
  {
26
  "epoch": 0.15584415584415584,
27
- "grad_norm": 0.5850092172622681,
28
  "learning_rate": 1.980895642046523e-05,
29
- "loss": 2.1318,
30
  "step": 300
31
  },
32
  {
33
  "epoch": 0.2077922077922078,
34
- "grad_norm": 0.8237331509590149,
35
  "learning_rate": 1.929749789909194e-05,
36
- "loss": 1.9827,
37
  "step": 400
38
  },
39
  {
40
  "epoch": 0.2597402597402597,
41
- "grad_norm": 0.8907209038734436,
42
  "learning_rate": 1.8481334754373765e-05,
43
- "loss": 1.9117,
44
  "step": 500
45
  },
46
  {
47
  "epoch": 0.3116883116883117,
48
- "grad_norm": 0.9720600843429565,
49
  "learning_rate": 1.738721490131949e-05,
50
- "loss": 1.8286,
51
  "step": 600
52
  },
53
  {
54
  "epoch": 0.36363636363636365,
55
- "grad_norm": 1.116287350654602,
56
  "learning_rate": 1.6050995662118095e-05,
57
- "loss": 1.7962,
58
  "step": 700
59
  },
60
  {
61
  "epoch": 0.4155844155844156,
62
- "grad_norm": 1.459151268005371,
63
  "learning_rate": 1.4516468622893718e-05,
64
- "loss": 1.7518,
65
  "step": 800
66
  },
67
  {
68
  "epoch": 0.4675324675324675,
69
- "grad_norm": 1.180469274520874,
70
  "learning_rate": 1.2833924462770364e-05,
71
- "loss": 1.7222,
72
  "step": 900
73
  },
74
  {
75
  "epoch": 0.5194805194805194,
76
- "grad_norm": 1.3427335023880005,
77
  "learning_rate": 1.1058504789752234e-05,
78
- "loss": 1.7119,
79
  "step": 1000
80
  },
81
  {
82
  "epoch": 0.5714285714285714,
83
- "grad_norm": 0.9414641261100769,
84
  "learning_rate": 9.248394998298933e-06,
85
- "loss": 1.6849,
86
  "step": 1100
87
  },
88
  {
89
  "epoch": 0.6233766233766234,
90
- "grad_norm": 1.2423919439315796,
91
  "learning_rate": 7.462917373632123e-06,
92
- "loss": 1.6988,
93
  "step": 1200
94
  },
95
  {
96
  "epoch": 0.6753246753246753,
97
- "grad_norm": 1.192747712135315,
98
  "learning_rate": 5.760586936999395e-06,
99
- "loss": 1.6711,
100
  "step": 1300
101
  },
102
  {
103
  "epoch": 0.7272727272727273,
104
- "grad_norm": 1.2693215608596802,
105
  "learning_rate": 4.197193747202849e-06,
106
- "loss": 1.6736,
107
  "step": 1400
108
  },
109
  {
110
  "epoch": 0.7792207792207793,
111
- "grad_norm": 1.3713454008102417,
112
  "learning_rate": 2.8239745066550085e-06,
113
- "loss": 1.7001,
114
  "step": 1500
115
  },
116
  {
117
  "epoch": 0.8311688311688312,
118
- "grad_norm": 1.3934439420700073,
119
  "learning_rate": 1.6859333934694332e-06,
120
- "loss": 1.6551,
121
  "step": 1600
122
  },
123
  {
124
  "epoch": 0.8831168831168831,
125
- "grad_norm": 1.3122085332870483,
126
  "learning_rate": 8.20367150544884e-07,
127
- "loss": 1.6369,
128
  "step": 1700
129
  },
130
  {
131
  "epoch": 0.935064935064935,
132
- "grad_norm": 1.1316357851028442,
133
  "learning_rate": 2.5564276853764057e-07,
134
- "loss": 1.6674,
135
  "step": 1800
136
  },
137
  {
138
  "epoch": 0.987012987012987,
139
- "grad_norm": 1.294811487197876,
140
  "learning_rate": 1.026782141876237e-08,
141
- "loss": 1.6051,
142
  "step": 1900
143
  },
144
  {
145
  "epoch": 1.0,
146
- "eval_loss": 1.5749374628067017,
147
- "eval_runtime": 30.4803,
148
- "eval_samples_per_second": 15.42,
149
- "eval_steps_per_second": 1.936,
150
  "step": 1925
151
  },
152
  {
153
  "epoch": 1.0,
154
  "step": 1925,
155
  "total_flos": 3.505428283392e+16,
156
- "train_loss": 1.8147928668926288,
157
- "train_runtime": 577.3896,
158
- "train_samples_per_second": 6.666,
159
- "train_steps_per_second": 3.334
160
  }
161
  ],
162
  "logging_steps": 100,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.05194805194805195,
13
+ "grad_norm": 0.16665750741958618,
14
  "learning_rate": 1.0416666666666668e-05,
15
+ "loss": 2.3686,
16
  "step": 100
17
  },
18
  {
19
  "epoch": 0.1038961038961039,
20
+ "grad_norm": 0.3539956212043762,
21
  "learning_rate": 1.9998948413537803e-05,
22
+ "loss": 2.3053,
23
  "step": 200
24
  },
25
  {
26
  "epoch": 0.15584415584415584,
27
+ "grad_norm": 0.5798401236534119,
28
  "learning_rate": 1.980895642046523e-05,
29
+ "loss": 2.1333,
30
  "step": 300
31
  },
32
  {
33
  "epoch": 0.2077922077922078,
34
+ "grad_norm": 0.8114813566207886,
35
  "learning_rate": 1.929749789909194e-05,
36
+ "loss": 1.985,
37
  "step": 400
38
  },
39
  {
40
  "epoch": 0.2597402597402597,
41
+ "grad_norm": 0.8917730450630188,
42
  "learning_rate": 1.8481334754373765e-05,
43
+ "loss": 1.9134,
44
  "step": 500
45
  },
46
  {
47
  "epoch": 0.3116883116883117,
48
+ "grad_norm": 0.9438478350639343,
49
  "learning_rate": 1.738721490131949e-05,
50
+ "loss": 1.8299,
51
  "step": 600
52
  },
53
  {
54
  "epoch": 0.36363636363636365,
55
+ "grad_norm": 1.0963283777236938,
56
  "learning_rate": 1.6050995662118095e-05,
57
+ "loss": 1.797,
58
  "step": 700
59
  },
60
  {
61
  "epoch": 0.4155844155844156,
62
+ "grad_norm": 1.442098617553711,
63
  "learning_rate": 1.4516468622893718e-05,
64
+ "loss": 1.7525,
65
  "step": 800
66
  },
67
  {
68
  "epoch": 0.4675324675324675,
69
+ "grad_norm": 1.1528605222702026,
70
  "learning_rate": 1.2833924462770364e-05,
71
+ "loss": 1.7228,
72
  "step": 900
73
  },
74
  {
75
  "epoch": 0.5194805194805194,
76
+ "grad_norm": 1.329001545906067,
77
  "learning_rate": 1.1058504789752234e-05,
78
+ "loss": 1.7123,
79
  "step": 1000
80
  },
81
  {
82
  "epoch": 0.5714285714285714,
83
+ "grad_norm": 0.9347788691520691,
84
  "learning_rate": 9.248394998298933e-06,
85
+ "loss": 1.6857,
86
  "step": 1100
87
  },
88
  {
89
  "epoch": 0.6233766233766234,
90
+ "grad_norm": 1.225298523902893,
91
  "learning_rate": 7.462917373632123e-06,
92
+ "loss": 1.6992,
93
  "step": 1200
94
  },
95
  {
96
  "epoch": 0.6753246753246753,
97
+ "grad_norm": 1.1854369640350342,
98
  "learning_rate": 5.760586936999395e-06,
99
+ "loss": 1.6715,
100
  "step": 1300
101
  },
102
  {
103
  "epoch": 0.7272727272727273,
104
+ "grad_norm": 1.2654165029525757,
105
  "learning_rate": 4.197193747202849e-06,
106
+ "loss": 1.6741,
107
  "step": 1400
108
  },
109
  {
110
  "epoch": 0.7792207792207793,
111
+ "grad_norm": 1.3753448724746704,
112
  "learning_rate": 2.8239745066550085e-06,
113
+ "loss": 1.7005,
114
  "step": 1500
115
  },
116
  {
117
  "epoch": 0.8311688311688312,
118
+ "grad_norm": 1.3942779302597046,
119
  "learning_rate": 1.6859333934694332e-06,
120
+ "loss": 1.6554,
121
  "step": 1600
122
  },
123
  {
124
  "epoch": 0.8831168831168831,
125
+ "grad_norm": 1.2966676950454712,
126
  "learning_rate": 8.20367150544884e-07,
127
+ "loss": 1.6371,
128
  "step": 1700
129
  },
130
  {
131
  "epoch": 0.935064935064935,
132
+ "grad_norm": 1.1420341730117798,
133
  "learning_rate": 2.5564276853764057e-07,
134
+ "loss": 1.6679,
135
  "step": 1800
136
  },
137
  {
138
  "epoch": 0.987012987012987,
139
+ "grad_norm": 1.289973497390747,
140
  "learning_rate": 1.026782141876237e-08,
141
+ "loss": 1.6056,
142
  "step": 1900
143
  },
144
  {
145
  "epoch": 1.0,
146
+ "eval_loss": 1.5753202438354492,
147
+ "eval_runtime": 30.4933,
148
+ "eval_samples_per_second": 15.413,
149
+ "eval_steps_per_second": 1.935,
150
  "step": 1925
151
  },
152
  {
153
  "epoch": 1.0,
154
  "step": 1925,
155
  "total_flos": 3.505428283392e+16,
156
+ "train_loss": 1.8155303717278815,
157
+ "train_runtime": 577.5445,
158
+ "train_samples_per_second": 6.664,
159
+ "train_steps_per_second": 3.333
160
  }
161
  ],
162
  "logging_steps": 100,