yixinsong commited on
Commit
e36ff24
·
1 Parent(s): ac8cfe2
config.json CHANGED
@@ -1,149 +1,3 @@
1
- {
2
- "architectures": [
3
- "SmallThinkerForCausalLM"
4
- ],
5
- "attention_dropout": 0.0,
6
- "auto_map": {
7
- "AutoConfig": "configuration_smallthinker.SmallThinkerConfig",
8
- "AutoModel": "modeling_smallthinker.SmallThinkerForCausalLM",
9
- "AutoModelForCausalLM": "modeling_smallthinker.SmallThinkerForCausalLM"
10
- },
11
- "bos_token_id": 151643,
12
- "eos_token_id": [151643, 151645],
13
- "head_dim": 128,
14
- "hidden_size": 2560,
15
- "initializer_range": 0.02,
16
- "max_length": null,
17
- "max_position_embeddings": 16384,
18
- "model_name": "smallthinker_21b_instruct",
19
- "moe_ffn_hidden_size": 768,
20
- "moe_num_active_primary_experts": 6,
21
- "moe_num_primary_experts": 64,
22
- "moe_primary_router_apply_softmax": true,
23
- "norm_topk_prob": true,
24
- "num_attention_heads": 28,
25
- "num_hidden_layers": 52,
26
- "num_key_value_heads": 4,
27
- "output_router_logits": false,
28
- "repetition_penalty": null,
29
- "rms_norm_eps": 1e-06,
30
- "rope_layout": [
31
- 0,
32
- 1,
33
- 1,
34
- 1,
35
- 0,
36
- 1,
37
- 1,
38
- 1,
39
- 0,
40
- 1,
41
- 1,
42
- 1,
43
- 0,
44
- 1,
45
- 1,
46
- 1,
47
- 0,
48
- 1,
49
- 1,
50
- 1,
51
- 0,
52
- 1,
53
- 1,
54
- 1,
55
- 0,
56
- 1,
57
- 1,
58
- 1,
59
- 0,
60
- 1,
61
- 1,
62
- 1,
63
- 0,
64
- 1,
65
- 1,
66
- 1,
67
- 0,
68
- 1,
69
- 1,
70
- 1,
71
- 0,
72
- 1,
73
- 1,
74
- 1,
75
- 0,
76
- 1,
77
- 1,
78
- 1,
79
- 0,
80
- 1,
81
- 1,
82
- 1
83
- ],
84
- "rope_scaling": null,
85
- "rope_theta": 1.5e6,
86
- "sliding_window_layout": [
87
- 0,
88
- 1,
89
- 1,
90
- 1,
91
- 0,
92
- 1,
93
- 1,
94
- 1,
95
- 0,
96
- 1,
97
- 1,
98
- 1,
99
- 0,
100
- 1,
101
- 1,
102
- 1,
103
- 0,
104
- 1,
105
- 1,
106
- 1,
107
- 0,
108
- 1,
109
- 1,
110
- 1,
111
- 0,
112
- 1,
113
- 1,
114
- 1,
115
- 0,
116
- 1,
117
- 1,
118
- 1,
119
- 0,
120
- 1,
121
- 1,
122
- 1,
123
- 0,
124
- 1,
125
- 1,
126
- 1,
127
- 0,
128
- 1,
129
- 1,
130
- 1,
131
- 0,
132
- 1,
133
- 1,
134
- 1,
135
- 0,
136
- 1,
137
- 1,
138
- 1
139
- ],
140
- "sliding_window_size": 4096,
141
- "temperature": null,
142
- "tie_word_embeddings": false,
143
- "tokenizer_class": "Qwen2Tokenizer",
144
- "top_p": null,
145
- "torch_dtype": "bfloat16",
146
- "transformers_version": "4.53.3",
147
- "use_cache": true,
148
- "vocab_size": 151936
149
- }
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3c9a25e61f6d17f058c9b1d0b931813c1fc316e0be592717d7f94e4d863a44d
3
+ size 1980
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
generation_config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9b600e7be2f77f42d53fbe61ca20257e5b2a684ec9ff43441427f6920a41b5b
3
+ size 143
model.safetensors.index.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b44595f69e2ddaa32c97351da99f9d215ed6b48371c1f0b644349cbc8b68104
3
+ size 1009867
tokenizer_config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a1b4ff97cb6413e8749f83390675d515ddb252b8c7dac89ffb597f826ffa428
3
+ size 7269
vocab.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca10d7e9fb3ed18575dd1e277a2579c16d108e32f27439684afa0e10b1440910
3
+ size 2776833