Update modeling_Llamoe.py
Browse files- modeling_Llamoe.py +3 -19
modeling_Llamoe.py
CHANGED
@@ -355,30 +355,14 @@ class LlamoeAttention(nn.Module):
|
|
355 |
|
356 |
def _init_rope(self):
|
357 |
if self.config.rope_scaling is None:
|
358 |
-
self.rotary_emb =
|
359 |
self.head_dim,
|
360 |
max_position_embeddings=self.max_position_embeddings,
|
361 |
base=self.rope_theta,
|
362 |
)
|
|
|
363 |
else:
|
364 |
-
|
365 |
-
scaling_factor = self.config.rope_scaling["factor"]
|
366 |
-
if scaling_type == "linear":
|
367 |
-
self.rotary_emb = LlamaLinearScalingRotaryEmbedding(
|
368 |
-
self.head_dim,
|
369 |
-
max_position_embeddings=self.max_position_embeddings,
|
370 |
-
scaling_factor=scaling_factor,
|
371 |
-
base=self.rope_theta,
|
372 |
-
)
|
373 |
-
elif scaling_type == "dynamic":
|
374 |
-
self.rotary_emb = LlamaDynamicNTKScalingRotaryEmbedding(
|
375 |
-
self.head_dim,
|
376 |
-
max_position_embeddings=self.max_position_embeddings,
|
377 |
-
scaling_factor=scaling_factor,
|
378 |
-
base=self.rope_theta,
|
379 |
-
)
|
380 |
-
else:
|
381 |
-
raise ValueError(f"Unknown RoPE scaling type {scaling_type}")
|
382 |
|
383 |
def forward(
|
384 |
self,
|
|
|
355 |
|
356 |
def _init_rope(self):
|
357 |
if self.config.rope_scaling is None:
|
358 |
+
self.rotary_emb = LlamoeRotaryEmbedding(
|
359 |
self.head_dim,
|
360 |
max_position_embeddings=self.max_position_embeddings,
|
361 |
base=self.rope_theta,
|
362 |
)
|
363 |
+
|
364 |
else:
|
365 |
+
raise ValueError(f"Unknown RoPE scaling type {scaling_type}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
366 |
|
367 |
def forward(
|
368 |
self,
|