upload

Browse files

Files changed (5) hide show

config.json +3 -149
generation_config.json +3 -0
model.safetensors.index.json +3 -0
tokenizer_config.json +3 -0
vocab.json +3 -0

config.json CHANGED Viewed

@@ -1,149 +1,3 @@
-{
-  "architectures": [
-    "SmallThinkerForCausalLM"
-  ],
-  "attention_dropout": 0.0,
-  "auto_map": {
-    "AutoConfig": "configuration_smallthinker.SmallThinkerConfig",
-    "AutoModel": "modeling_smallthinker.SmallThinkerForCausalLM",
-    "AutoModelForCausalLM": "modeling_smallthinker.SmallThinkerForCausalLM"
-  },
-  "bos_token_id": 151643,
-  "eos_token_id": [151643, 151645],
-  "head_dim": 128,
-  "hidden_size": 2560,
-  "initializer_range": 0.02,
-  "max_length": null,
-  "max_position_embeddings": 16384,
-  "model_name": "smallthinker_21b_instruct",
-  "moe_ffn_hidden_size": 768,
-  "moe_num_active_primary_experts": 6,
-  "moe_num_primary_experts": 64,
-  "moe_primary_router_apply_softmax": true,
-  "norm_topk_prob": true,
-  "num_attention_heads": 28,
-  "num_hidden_layers": 52,
-  "num_key_value_heads": 4,
-  "output_router_logits": false,
-  "repetition_penalty": null,
-  "rms_norm_eps": 1e-06,
-  "rope_layout": [
-    0,
-    1,
-    1,
-    1,
-    0,
-    1,
-    1,
-    1,
-    0,
-    1,
-    1,
-    1,
-    0,
-    1,
-    1,
-    1,
-    0,
-    1,
-    1,
-    1,
-    0,
-    1,
-    1,
-    1,
-    0,
-    1,
-    1,
-    1,
-    0,
-    1,
-    1,
-    1,
-    0,
-    1,
-    1,
-    1,
-    0,
-    1,
-    1,
-    1,
-    0,
-    1,
-    1,
-    1,
-    0,
-    1,
-    1,
-    1,
-    0,
-    1,
-    1,
-    1
-  ],
-  "rope_scaling": null,
-  "rope_theta": 1.5e6,
-  "sliding_window_layout": [
-    0,
-    1,
-    1,
-    1,
-    0,
-    1,
-    1,
-    1,
-    0,
-    1,
-    1,
-    1,
-    0,
-    1,
-    1,
-    1,
-    0,
-    1,
-    1,
-    1,
-    0,
-    1,
-    1,
-    1,
-    0,
-    1,
-    1,
-    1,
-    0,
-    1,
-    1,
-    1,
-    0,
-    1,
-    1,
-    1,
-    0,
-    1,
-    1,
-    1,
-    0,
-    1,
-    1,
-    1,
-    0,
-    1,
-    1,
-    1,
-    0,
-    1,
-    1,
-    1
-  ],
-  "sliding_window_size": 4096,
-  "temperature": null,
-  "tie_word_embeddings": false,
-  "tokenizer_class": "Qwen2Tokenizer",
-  "top_p": null,
-  "torch_dtype": "bfloat16",
-  "transformers_version": "4.53.3",
-  "use_cache": true,
-  "vocab_size": 151936
-}

+version https://git-lfs.github.com/spec/v1
+oid sha256:b3c9a25e61f6d17f058c9b1d0b931813c1fc316e0be592717d7f94e4d863a44d
+size 1980

generation_config.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f9b600e7be2f77f42d53fbe61ca20257e5b2a684ec9ff43441427f6920a41b5b
+size 143

model.safetensors.index.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3b44595f69e2ddaa32c97351da99f9d215ed6b48371c1f0b644349cbc8b68104
+size 1009867

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3a1b4ff97cb6413e8749f83390675d515ddb252b8c7dac89ffb597f826ffa428
+size 7269

vocab.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ca10d7e9fb3ed18575dd1e277a2579c16d108e32f27439684afa0e10b1440910
+size 2776833