TroyDoesAI commited on
Commit
6b411d0
·
verified ·
1 Parent(s): ed8f326

1 Epoch of specialization with 2 experts Active instead of ALL 3 Experts Active for the MoE Research Experiment now that its performing well on the dataset without completing a single epoch yet.

Browse files
config.json CHANGED
@@ -15,7 +15,7 @@
15
  "mlp_bias": false,
16
  "model_type": "mixtral",
17
  "num_attention_heads": 32,
18
- "num_experts_per_tok": 3,
19
  "num_hidden_layers": 32,
20
  "num_key_value_heads": 32,
21
  "num_local_experts": 3,
@@ -30,6 +30,6 @@
30
  "tie_word_embeddings": false,
31
  "torch_dtype": "bfloat16",
32
  "transformers_version": "4.44.0",
33
- "use_cache": true,
34
  "vocab_size": 32064
35
  }
 
15
  "mlp_bias": false,
16
  "model_type": "mixtral",
17
  "num_attention_heads": 32,
18
+ "num_experts_per_tok": 2,
19
  "num_hidden_layers": 32,
20
  "num_key_value_heads": 32,
21
  "num_local_experts": 3,
 
30
  "tie_word_embeddings": false,
31
  "torch_dtype": "bfloat16",
32
  "transformers_version": "4.44.0",
33
+ "use_cache": false,
34
  "vocab_size": 32064
35
  }
pytorch_model-00001-of-00004.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c935f6852e670601986f7de698ab841e55b7c9527b6ef4479e02ce0ae1236667
3
  size 4991420771
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:332a56d10060b0d62a8926c97bcf7299fc92df64157637a7037b79df1afcf3fb
3
  size 4991420771
pytorch_model-00002-of-00004.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:357ff9c49f03aa20e34ed1b0d7654dd4445e277f98514e2eff9ba9777760ae14
3
  size 4995766167
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddba2c3fcd660853ad62bd343cb0f587a260afd34aeeb1190350f800c27410f1
3
  size 4995766167
pytorch_model-00003-of-00004.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:344b766b760bcf5dbe7f3c7c29f585687c2478c664e3c7a11d196f3123ee3135
3
  size 4957997958
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:286f9b911e1a0259aa28bc8031580c9be9e460217d86c45c85a69dbcc41863ad
3
  size 4957997958
pytorch_model-00004-of-00004.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cb0906f27f74ece8b3e9df1bed33abe67c5c8ec398d4dc15529d6ec733e79e6e
3
  size 2361428548
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:959b14d1183ba6ad54c9f543fa49e01b1f0cfc78f087cd13cc7a4b2a28839a38
3
  size 2361428548