Upload moe_idefics2.py
Browse files- moe_idefics2.py +0 -6
moe_idefics2.py
CHANGED
@@ -84,7 +84,6 @@ class ModifiedIdefics2DecoderLayer(nn.Module):
|
|
84 |
self.mlp = moe_layer
|
85 |
self.input_layernorm = original_layer.input_layernorm
|
86 |
self.post_attention_layernorm = original_layer.post_attention_layernorm
|
87 |
-
#print ("Init: ModifiedIdefics2DecoderLayer")
|
88 |
|
89 |
def forward(
|
90 |
self,
|
@@ -190,9 +189,6 @@ class Idefics2ForCausalLMMoE(Idefics2ForConditionalGeneration):
|
|
190 |
"AutoModelForCausalLM": "moe_idefics2.Idefics2ForCausalLMMoE",
|
191 |
}
|
192 |
|
193 |
-
self.use_embeddings_in_router=config.use_embeddings_in_router
|
194 |
-
print ("Use embeddigs in router: ", self.use_embeddings_in_router )
|
195 |
-
|
196 |
self.model = base_model or Idefics2ForConditionalGeneration(
|
197 |
self.config
|
198 |
)
|
@@ -200,9 +196,7 @@ class Idefics2ForCausalLMMoE(Idefics2ForConditionalGeneration):
|
|
200 |
if base_model and expert_models:
|
201 |
self.num_expert_models = len(expert_models)
|
202 |
self._init_moe_layers(base_model, expert_models, k, layer_dtype)
|
203 |
-
print ("CONSTRUCTOR self.model",self.model)
|
204 |
else:
|
205 |
-
|
206 |
print(
|
207 |
"Init function called and generating dummy experts: k=",
|
208 |
k,
|
|
|
84 |
self.mlp = moe_layer
|
85 |
self.input_layernorm = original_layer.input_layernorm
|
86 |
self.post_attention_layernorm = original_layer.post_attention_layernorm
|
|
|
87 |
|
88 |
def forward(
|
89 |
self,
|
|
|
189 |
"AutoModelForCausalLM": "moe_idefics2.Idefics2ForCausalLMMoE",
|
190 |
}
|
191 |
|
|
|
|
|
|
|
192 |
self.model = base_model or Idefics2ForConditionalGeneration(
|
193 |
self.config
|
194 |
)
|
|
|
196 |
if base_model and expert_models:
|
197 |
self.num_expert_models = len(expert_models)
|
198 |
self._init_moe_layers(base_model, expert_models, k, layer_dtype)
|
|
|
199 |
else:
|
|
|
200 |
print(
|
201 |
"Init function called and generating dummy experts: k=",
|
202 |
k,
|