yixinsong commited on
Commit
1040800
·
1 Parent(s): 3bfc484

upload config

Browse files
Files changed (1) hide show
  1. config.json +149 -0
config.json ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "SmallThinkerForCausalLM"
4
+ ],
5
+ "attention_dropout": 0.0,
6
+ "auto_map": {
7
+ "AutoConfig": "configuration_smallthinker.SmallThinkerConfig",
8
+ "AutoModel": "modeling_smallthinker.SmallThinkerForCausalLM",
9
+ "AutoModelForCausalLM": "modeling_smallthinker.SmallThinkerForCausalLM"
10
+ },
11
+ "bos_token_id": 151643,
12
+ "eos_token_id": [151643, 151645],
13
+ "head_dim": 128,
14
+ "hidden_size": 2560,
15
+ "initializer_range": 0.02,
16
+ "max_length": null,
17
+ "max_position_embeddings": 16384,
18
+ "model_name": "smallthinker_21b_instruct",
19
+ "moe_ffn_hidden_size": 768,
20
+ "moe_num_active_primary_experts": 6,
21
+ "moe_num_primary_experts": 64,
22
+ "moe_primary_router_apply_softmax": true,
23
+ "norm_topk_prob": true,
24
+ "num_attention_heads": 28,
25
+ "num_hidden_layers": 52,
26
+ "num_key_value_heads": 4,
27
+ "output_router_logits": false,
28
+ "repetition_penalty": null,
29
+ "rms_norm_eps": 1e-06,
30
+ "rope_layout": [
31
+ 0,
32
+ 1,
33
+ 1,
34
+ 1,
35
+ 0,
36
+ 1,
37
+ 1,
38
+ 1,
39
+ 0,
40
+ 1,
41
+ 1,
42
+ 1,
43
+ 0,
44
+ 1,
45
+ 1,
46
+ 1,
47
+ 0,
48
+ 1,
49
+ 1,
50
+ 1,
51
+ 0,
52
+ 1,
53
+ 1,
54
+ 1,
55
+ 0,
56
+ 1,
57
+ 1,
58
+ 1,
59
+ 0,
60
+ 1,
61
+ 1,
62
+ 1,
63
+ 0,
64
+ 1,
65
+ 1,
66
+ 1,
67
+ 0,
68
+ 1,
69
+ 1,
70
+ 1,
71
+ 0,
72
+ 1,
73
+ 1,
74
+ 1,
75
+ 0,
76
+ 1,
77
+ 1,
78
+ 1,
79
+ 0,
80
+ 1,
81
+ 1,
82
+ 1
83
+ ],
84
+ "rope_scaling": null,
85
+ "rope_theta": 1.5e6,
86
+ "sliding_window_layout": [
87
+ 0,
88
+ 1,
89
+ 1,
90
+ 1,
91
+ 0,
92
+ 1,
93
+ 1,
94
+ 1,
95
+ 0,
96
+ 1,
97
+ 1,
98
+ 1,
99
+ 0,
100
+ 1,
101
+ 1,
102
+ 1,
103
+ 0,
104
+ 1,
105
+ 1,
106
+ 1,
107
+ 0,
108
+ 1,
109
+ 1,
110
+ 1,
111
+ 0,
112
+ 1,
113
+ 1,
114
+ 1,
115
+ 0,
116
+ 1,
117
+ 1,
118
+ 1,
119
+ 0,
120
+ 1,
121
+ 1,
122
+ 1,
123
+ 0,
124
+ 1,
125
+ 1,
126
+ 1,
127
+ 0,
128
+ 1,
129
+ 1,
130
+ 1,
131
+ 0,
132
+ 1,
133
+ 1,
134
+ 1,
135
+ 0,
136
+ 1,
137
+ 1,
138
+ 1
139
+ ],
140
+ "sliding_window_size": 4096,
141
+ "temperature": null,
142
+ "tie_word_embeddings": false,
143
+ "tokenizer_class": "Qwen2Tokenizer",
144
+ "top_p": null,
145
+ "torch_dtype": "bfloat16",
146
+ "transformers_version": "4.53.3",
147
+ "use_cache": true,
148
+ "vocab_size": 151936
149
+ }