Correct name of MixtralBlockSparseTop2MLP (L -> l) (#1667)
Browse files
src/axolotl/monkeypatch/mixtral/__init__.py
CHANGED
@@ -42,9 +42,9 @@ def patch_mixtral_moe_forward_zero3() -> None:
|
|
42 |
return final_hidden_states, router_logits
|
43 |
|
44 |
from transformers.models.mixtral.modeling_mixtral import (
|
45 |
-
|
46 |
MixtralSparseMoeBlock,
|
47 |
)
|
48 |
|
49 |
-
|
50 |
MixtralSparseMoeBlock.forward = moe_forward
|
|
|
42 |
return final_hidden_states, router_logits
|
43 |
|
44 |
from transformers.models.mixtral.modeling_mixtral import (
|
45 |
+
MixtralBlockSparseTop2MLP,
|
46 |
MixtralSparseMoeBlock,
|
47 |
)
|
48 |
|
49 |
+
MixtralBlockSparseTop2MLP.forward = mlp_forward
|
50 |
MixtralSparseMoeBlock.forward = moe_forward
|