mengzhouxia commited on
Commit
976f7b4
·
1 Parent(s): 1fd7dac

first commit

Browse files
Files changed (4) hide show
  1. config.json +188 -0
  2. pytorch_model.bin +3 -0
  3. tokenizer_config.json +1 -0
  4. vocab.txt +0 -0
config.json ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "NewBertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "decompose_qk": false,
7
+ "decompose_vo": false,
8
+ "do_distill": true,
9
+ "do_emb_distill": false,
10
+ "do_layer_distill": true,
11
+ "do_mha_distill": false,
12
+ "do_mha_layer_distill": false,
13
+ "finetuning_task": "sst-2",
14
+ "gradient_checkpointing": false,
15
+ "hidden_act": "gelu",
16
+ "hidden_dropout_prob": 0.1,
17
+ "hidden_size": 768,
18
+ "initializer_range": 0.02,
19
+ "intermediate_size": 3072,
20
+ "layer_norm_eps": 1e-12,
21
+ "max_position_embeddings": 512,
22
+ "model_type": "bert",
23
+ "num_attention_heads": 12,
24
+ "num_hidden_layers": 12,
25
+ "output_attentions": true,
26
+ "output_hidden_states": true,
27
+ "pad_token_id": 0,
28
+ "pruned_heads": {
29
+ "0": [
30
+ 0,
31
+ 4,
32
+ 5,
33
+ 6,
34
+ 7,
35
+ 8,
36
+ 9
37
+ ],
38
+ "1": [
39
+ 0,
40
+ 1,
41
+ 2,
42
+ 3,
43
+ 4,
44
+ 5,
45
+ 6,
46
+ 7,
47
+ 8,
48
+ 9,
49
+ 10,
50
+ 11
51
+ ],
52
+ "2": [
53
+ 0,
54
+ 1,
55
+ 2,
56
+ 3,
57
+ 4,
58
+ 5,
59
+ 6,
60
+ 7,
61
+ 8,
62
+ 9,
63
+ 10,
64
+ 11
65
+ ],
66
+ "3": [
67
+ 0,
68
+ 1,
69
+ 2,
70
+ 3,
71
+ 4,
72
+ 5,
73
+ 6,
74
+ 7,
75
+ 8,
76
+ 9,
77
+ 10,
78
+ 11
79
+ ],
80
+ "4": [
81
+ 0,
82
+ 1,
83
+ 2,
84
+ 3,
85
+ 4,
86
+ 5,
87
+ 6,
88
+ 7,
89
+ 8,
90
+ 9,
91
+ 10,
92
+ 11
93
+ ],
94
+ "5": [
95
+ 0,
96
+ 1,
97
+ 2,
98
+ 3,
99
+ 4,
100
+ 5,
101
+ 6,
102
+ 7,
103
+ 8,
104
+ 9,
105
+ 10,
106
+ 11
107
+ ],
108
+ "6": [
109
+ 0,
110
+ 1,
111
+ 2,
112
+ 3,
113
+ 4,
114
+ 6,
115
+ 7,
116
+ 8,
117
+ 10,
118
+ 11
119
+ ],
120
+ "7": [
121
+ 0,
122
+ 1,
123
+ 2,
124
+ 3,
125
+ 4,
126
+ 5,
127
+ 6,
128
+ 7,
129
+ 8,
130
+ 9,
131
+ 10,
132
+ 11
133
+ ],
134
+ "8": [
135
+ 0,
136
+ 1,
137
+ 2,
138
+ 3,
139
+ 4,
140
+ 5,
141
+ 6,
142
+ 7,
143
+ 8,
144
+ 10
145
+ ],
146
+ "9": [
147
+ 0,
148
+ 1,
149
+ 3,
150
+ 4,
151
+ 6,
152
+ 7,
153
+ 8,
154
+ 9,
155
+ 10,
156
+ 11
157
+ ],
158
+ "10": [
159
+ 0,
160
+ 1,
161
+ 2,
162
+ 3,
163
+ 5,
164
+ 6,
165
+ 7,
166
+ 8,
167
+ 10,
168
+ 11
169
+ ],
170
+ "11": [
171
+ 0,
172
+ 1,
173
+ 2,
174
+ 3,
175
+ 5,
176
+ 6,
177
+ 7,
178
+ 8,
179
+ 9,
180
+ 11
181
+ ]
182
+ },
183
+ "qk_denominator": "ori",
184
+ "sephidden_pruned": false,
185
+ "transform_embedding": false,
186
+ "type_vocab_size": 2,
187
+ "vocab_size": 30522
188
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21ce13174880a6b439ec788cbc77c8b6f5a4be81734c90b7878a3592fd1ae630
3
+ size 116237287
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": true, "model_max_length": 512}
vocab.txt ADDED
The diff for this file is too large to render. See raw diff