drbh HF Staff commited on
Commit
eba2c2c
·
verified ·
1 Parent(s): 76c7de7

Build from kernel-builder job

Browse files
Files changed (27) hide show
  1. build/torch26-cxx11-cu118-x86_64-linux/megablocks/_megablocks_76c7de7.abi3.so +3 -0
  2. build/torch26-cxx11-cu118-x86_64-linux/megablocks/_ops.py +3 -3
  3. build/torch26-cxx11-cu118-x86_64-linux/megablocks/layers.py +2 -2
  4. build/torch26-cxx11-cu124-x86_64-linux/megablocks/_megablocks_76c7de7.abi3.so +3 -0
  5. build/torch26-cxx11-cu124-x86_64-linux/megablocks/_ops.py +3 -3
  6. build/torch26-cxx11-cu124-x86_64-linux/megablocks/layers.py +2 -2
  7. build/torch26-cxx11-cu126-x86_64-linux/megablocks/_megablocks_76c7de7.abi3.so +3 -0
  8. build/torch26-cxx11-cu126-x86_64-linux/megablocks/_ops.py +3 -3
  9. build/torch26-cxx11-cu126-x86_64-linux/megablocks/layers.py +2 -2
  10. build/torch26-cxx98-cu118-x86_64-linux/megablocks/_megablocks_76c7de7.abi3.so +3 -0
  11. build/torch26-cxx98-cu118-x86_64-linux/megablocks/_ops.py +3 -3
  12. build/torch26-cxx98-cu118-x86_64-linux/megablocks/layers.py +2 -2
  13. build/torch26-cxx98-cu124-x86_64-linux/megablocks/_megablocks_76c7de7.abi3.so +3 -0
  14. build/torch26-cxx98-cu124-x86_64-linux/megablocks/_ops.py +3 -3
  15. build/torch26-cxx98-cu124-x86_64-linux/megablocks/layers.py +2 -2
  16. build/torch26-cxx98-cu126-x86_64-linux/megablocks/_megablocks_76c7de7.abi3.so +3 -0
  17. build/torch26-cxx98-cu126-x86_64-linux/megablocks/_ops.py +3 -3
  18. build/torch26-cxx98-cu126-x86_64-linux/megablocks/layers.py +2 -2
  19. build/torch27-cxx11-cu118-x86_64-linux/megablocks/_megablocks_76c7de7.abi3.so +3 -0
  20. build/torch27-cxx11-cu118-x86_64-linux/megablocks/_ops.py +3 -3
  21. build/torch27-cxx11-cu118-x86_64-linux/megablocks/layers.py +2 -2
  22. build/torch27-cxx11-cu126-x86_64-linux/megablocks/_megablocks_76c7de7.abi3.so +3 -0
  23. build/torch27-cxx11-cu126-x86_64-linux/megablocks/_ops.py +3 -3
  24. build/torch27-cxx11-cu126-x86_64-linux/megablocks/layers.py +2 -2
  25. build/torch27-cxx11-cu128-x86_64-linux/megablocks/_megablocks_76c7de7.abi3.so +3 -0
  26. build/torch27-cxx11-cu128-x86_64-linux/megablocks/_ops.py +3 -3
  27. build/torch27-cxx11-cu128-x86_64-linux/megablocks/layers.py +2 -2
build/torch26-cxx11-cu118-x86_64-linux/megablocks/_megablocks_76c7de7.abi3.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c5605ba50f2661b9dc4c5609572323fb4f52787181109c5900c261c5e2bf602
3
+ size 10517576
build/torch26-cxx11-cu118-x86_64-linux/megablocks/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _megablocks_9a1816c
3
- ops = torch.ops._megablocks_9a1816c
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_megablocks_9a1816c::{op_name}"
 
1
  import torch
2
+ from . import _megablocks_76c7de7
3
+ ops = torch.ops._megablocks_76c7de7
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_megablocks_76c7de7::{op_name}"
build/torch26-cxx11-cu118-x86_64-linux/megablocks/layers.py CHANGED
@@ -692,8 +692,8 @@ class MegaBlocksMoeMLP(torch.nn.Module):
692
  moe_normalize_expert_weights = getattr(self.experts, "normalize_expert_weights", None)
693
  uniform_expert_assignment = getattr(self, "uniform_expert_assignment", False)
694
 
695
- has_parallel = hasattr(self, "expert_parallel_group")
696
- expert_parallel_group = torch.distributed.group.WORLD
697
  forward_fn = parallel_forward_once if has_parallel else forward_once
698
 
699
  sort_end_bit = max(int(torch.ceil(torch.log2(torch.tensor(moe_num_experts)))), 1)
 
692
  moe_normalize_expert_weights = getattr(self.experts, "normalize_expert_weights", None)
693
  uniform_expert_assignment = getattr(self, "uniform_expert_assignment", False)
694
 
695
+ expert_parallel_group = getattr(self, "expert_parallel_group", None)
696
+ has_parallel = expert_parallel_group is not None and dist.is_initialized() and dist.get_world_size(expert_parallel_group) > 1
697
  forward_fn = parallel_forward_once if has_parallel else forward_once
698
 
699
  sort_end_bit = max(int(torch.ceil(torch.log2(torch.tensor(moe_num_experts)))), 1)
build/torch26-cxx11-cu124-x86_64-linux/megablocks/_megablocks_76c7de7.abi3.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12c21d6f72f90950adbda156534691dd753476a18719b416541e8d6920a173b4
3
+ size 11869392
build/torch26-cxx11-cu124-x86_64-linux/megablocks/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _megablocks_9a1816c
3
- ops = torch.ops._megablocks_9a1816c
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_megablocks_9a1816c::{op_name}"
 
1
  import torch
2
+ from . import _megablocks_76c7de7
3
+ ops = torch.ops._megablocks_76c7de7
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_megablocks_76c7de7::{op_name}"
build/torch26-cxx11-cu124-x86_64-linux/megablocks/layers.py CHANGED
@@ -692,8 +692,8 @@ class MegaBlocksMoeMLP(torch.nn.Module):
692
  moe_normalize_expert_weights = getattr(self.experts, "normalize_expert_weights", None)
693
  uniform_expert_assignment = getattr(self, "uniform_expert_assignment", False)
694
 
695
- has_parallel = hasattr(self, "expert_parallel_group")
696
- expert_parallel_group = torch.distributed.group.WORLD
697
  forward_fn = parallel_forward_once if has_parallel else forward_once
698
 
699
  sort_end_bit = max(int(torch.ceil(torch.log2(torch.tensor(moe_num_experts)))), 1)
 
692
  moe_normalize_expert_weights = getattr(self.experts, "normalize_expert_weights", None)
693
  uniform_expert_assignment = getattr(self, "uniform_expert_assignment", False)
694
 
695
+ expert_parallel_group = getattr(self, "expert_parallel_group", None)
696
+ has_parallel = expert_parallel_group is not None and dist.is_initialized() and dist.get_world_size(expert_parallel_group) > 1
697
  forward_fn = parallel_forward_once if has_parallel else forward_once
698
 
699
  sort_end_bit = max(int(torch.ceil(torch.log2(torch.tensor(moe_num_experts)))), 1)
build/torch26-cxx11-cu126-x86_64-linux/megablocks/_megablocks_76c7de7.abi3.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db7e3b7c3c15af78fe9ef0ba50c33cb2cb988bdf5dfb1f46807b7871e7c8e70e
3
+ size 11931048
build/torch26-cxx11-cu126-x86_64-linux/megablocks/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _megablocks_9a1816c
3
- ops = torch.ops._megablocks_9a1816c
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_megablocks_9a1816c::{op_name}"
 
1
  import torch
2
+ from . import _megablocks_76c7de7
3
+ ops = torch.ops._megablocks_76c7de7
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_megablocks_76c7de7::{op_name}"
build/torch26-cxx11-cu126-x86_64-linux/megablocks/layers.py CHANGED
@@ -692,8 +692,8 @@ class MegaBlocksMoeMLP(torch.nn.Module):
692
  moe_normalize_expert_weights = getattr(self.experts, "normalize_expert_weights", None)
693
  uniform_expert_assignment = getattr(self, "uniform_expert_assignment", False)
694
 
695
- has_parallel = hasattr(self, "expert_parallel_group")
696
- expert_parallel_group = torch.distributed.group.WORLD
697
  forward_fn = parallel_forward_once if has_parallel else forward_once
698
 
699
  sort_end_bit = max(int(torch.ceil(torch.log2(torch.tensor(moe_num_experts)))), 1)
 
692
  moe_normalize_expert_weights = getattr(self.experts, "normalize_expert_weights", None)
693
  uniform_expert_assignment = getattr(self, "uniform_expert_assignment", False)
694
 
695
+ expert_parallel_group = getattr(self, "expert_parallel_group", None)
696
+ has_parallel = expert_parallel_group is not None and dist.is_initialized() and dist.get_world_size(expert_parallel_group) > 1
697
  forward_fn = parallel_forward_once if has_parallel else forward_once
698
 
699
  sort_end_bit = max(int(torch.ceil(torch.log2(torch.tensor(moe_num_experts)))), 1)
build/torch26-cxx98-cu118-x86_64-linux/megablocks/_megablocks_76c7de7.abi3.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9971a30d397598ee0a58118b8cca337d142de1ca34404532dfda6328122ab11
3
+ size 10510040
build/torch26-cxx98-cu118-x86_64-linux/megablocks/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _megablocks_9a1816c
3
- ops = torch.ops._megablocks_9a1816c
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_megablocks_9a1816c::{op_name}"
 
1
  import torch
2
+ from . import _megablocks_76c7de7
3
+ ops = torch.ops._megablocks_76c7de7
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_megablocks_76c7de7::{op_name}"
build/torch26-cxx98-cu118-x86_64-linux/megablocks/layers.py CHANGED
@@ -692,8 +692,8 @@ class MegaBlocksMoeMLP(torch.nn.Module):
692
  moe_normalize_expert_weights = getattr(self.experts, "normalize_expert_weights", None)
693
  uniform_expert_assignment = getattr(self, "uniform_expert_assignment", False)
694
 
695
- has_parallel = hasattr(self, "expert_parallel_group")
696
- expert_parallel_group = torch.distributed.group.WORLD
697
  forward_fn = parallel_forward_once if has_parallel else forward_once
698
 
699
  sort_end_bit = max(int(torch.ceil(torch.log2(torch.tensor(moe_num_experts)))), 1)
 
692
  moe_normalize_expert_weights = getattr(self.experts, "normalize_expert_weights", None)
693
  uniform_expert_assignment = getattr(self, "uniform_expert_assignment", False)
694
 
695
+ expert_parallel_group = getattr(self, "expert_parallel_group", None)
696
+ has_parallel = expert_parallel_group is not None and dist.is_initialized() and dist.get_world_size(expert_parallel_group) > 1
697
  forward_fn = parallel_forward_once if has_parallel else forward_once
698
 
699
  sort_end_bit = max(int(torch.ceil(torch.log2(torch.tensor(moe_num_experts)))), 1)
build/torch26-cxx98-cu124-x86_64-linux/megablocks/_megablocks_76c7de7.abi3.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c136e90b35e7fd43fcc4d987588f68b3f4cfea295a00f1fda343acc9c8848577
3
+ size 11857920
build/torch26-cxx98-cu124-x86_64-linux/megablocks/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _megablocks_9a1816c
3
- ops = torch.ops._megablocks_9a1816c
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_megablocks_9a1816c::{op_name}"
 
1
  import torch
2
+ from . import _megablocks_76c7de7
3
+ ops = torch.ops._megablocks_76c7de7
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_megablocks_76c7de7::{op_name}"
build/torch26-cxx98-cu124-x86_64-linux/megablocks/layers.py CHANGED
@@ -692,8 +692,8 @@ class MegaBlocksMoeMLP(torch.nn.Module):
692
  moe_normalize_expert_weights = getattr(self.experts, "normalize_expert_weights", None)
693
  uniform_expert_assignment = getattr(self, "uniform_expert_assignment", False)
694
 
695
- has_parallel = hasattr(self, "expert_parallel_group")
696
- expert_parallel_group = torch.distributed.group.WORLD
697
  forward_fn = parallel_forward_once if has_parallel else forward_once
698
 
699
  sort_end_bit = max(int(torch.ceil(torch.log2(torch.tensor(moe_num_experts)))), 1)
 
692
  moe_normalize_expert_weights = getattr(self.experts, "normalize_expert_weights", None)
693
  uniform_expert_assignment = getattr(self, "uniform_expert_assignment", False)
694
 
695
+ expert_parallel_group = getattr(self, "expert_parallel_group", None)
696
+ has_parallel = expert_parallel_group is not None and dist.is_initialized() and dist.get_world_size(expert_parallel_group) > 1
697
  forward_fn = parallel_forward_once if has_parallel else forward_once
698
 
699
  sort_end_bit = max(int(torch.ceil(torch.log2(torch.tensor(moe_num_experts)))), 1)
build/torch26-cxx98-cu126-x86_64-linux/megablocks/_megablocks_76c7de7.abi3.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3f893773ec7b8157a4531a57821807f5f27ac48ceaa695c342cc7a39ad318dc
3
+ size 11927768
build/torch26-cxx98-cu126-x86_64-linux/megablocks/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _megablocks_9a1816c
3
- ops = torch.ops._megablocks_9a1816c
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_megablocks_9a1816c::{op_name}"
 
1
  import torch
2
+ from . import _megablocks_76c7de7
3
+ ops = torch.ops._megablocks_76c7de7
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_megablocks_76c7de7::{op_name}"
build/torch26-cxx98-cu126-x86_64-linux/megablocks/layers.py CHANGED
@@ -692,8 +692,8 @@ class MegaBlocksMoeMLP(torch.nn.Module):
692
  moe_normalize_expert_weights = getattr(self.experts, "normalize_expert_weights", None)
693
  uniform_expert_assignment = getattr(self, "uniform_expert_assignment", False)
694
 
695
- has_parallel = hasattr(self, "expert_parallel_group")
696
- expert_parallel_group = torch.distributed.group.WORLD
697
  forward_fn = parallel_forward_once if has_parallel else forward_once
698
 
699
  sort_end_bit = max(int(torch.ceil(torch.log2(torch.tensor(moe_num_experts)))), 1)
 
692
  moe_normalize_expert_weights = getattr(self.experts, "normalize_expert_weights", None)
693
  uniform_expert_assignment = getattr(self, "uniform_expert_assignment", False)
694
 
695
+ expert_parallel_group = getattr(self, "expert_parallel_group", None)
696
+ has_parallel = expert_parallel_group is not None and dist.is_initialized() and dist.get_world_size(expert_parallel_group) > 1
697
  forward_fn = parallel_forward_once if has_parallel else forward_once
698
 
699
  sort_end_bit = max(int(torch.ceil(torch.log2(torch.tensor(moe_num_experts)))), 1)
build/torch27-cxx11-cu118-x86_64-linux/megablocks/_megablocks_76c7de7.abi3.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aff7108245384777d22e9023ae3fd4cf2bcb0015a0938e314d556dbd3e59fe00
3
+ size 10517816
build/torch27-cxx11-cu118-x86_64-linux/megablocks/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _megablocks_9a1816c
3
- ops = torch.ops._megablocks_9a1816c
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_megablocks_9a1816c::{op_name}"
 
1
  import torch
2
+ from . import _megablocks_76c7de7
3
+ ops = torch.ops._megablocks_76c7de7
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_megablocks_76c7de7::{op_name}"
build/torch27-cxx11-cu118-x86_64-linux/megablocks/layers.py CHANGED
@@ -692,8 +692,8 @@ class MegaBlocksMoeMLP(torch.nn.Module):
692
  moe_normalize_expert_weights = getattr(self.experts, "normalize_expert_weights", None)
693
  uniform_expert_assignment = getattr(self, "uniform_expert_assignment", False)
694
 
695
- has_parallel = hasattr(self, "expert_parallel_group")
696
- expert_parallel_group = torch.distributed.group.WORLD
697
  forward_fn = parallel_forward_once if has_parallel else forward_once
698
 
699
  sort_end_bit = max(int(torch.ceil(torch.log2(torch.tensor(moe_num_experts)))), 1)
 
692
  moe_normalize_expert_weights = getattr(self.experts, "normalize_expert_weights", None)
693
  uniform_expert_assignment = getattr(self, "uniform_expert_assignment", False)
694
 
695
+ expert_parallel_group = getattr(self, "expert_parallel_group", None)
696
+ has_parallel = expert_parallel_group is not None and dist.is_initialized() and dist.get_world_size(expert_parallel_group) > 1
697
  forward_fn = parallel_forward_once if has_parallel else forward_once
698
 
699
  sort_end_bit = max(int(torch.ceil(torch.log2(torch.tensor(moe_num_experts)))), 1)
build/torch27-cxx11-cu126-x86_64-linux/megablocks/_megablocks_76c7de7.abi3.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa1eeccba0a3a26435538a2aa87bc22a40c0201a79979872f6296af984e7bf1e
3
+ size 11931080
build/torch27-cxx11-cu126-x86_64-linux/megablocks/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _megablocks_9a1816c
3
- ops = torch.ops._megablocks_9a1816c
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_megablocks_9a1816c::{op_name}"
 
1
  import torch
2
+ from . import _megablocks_76c7de7
3
+ ops = torch.ops._megablocks_76c7de7
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_megablocks_76c7de7::{op_name}"
build/torch27-cxx11-cu126-x86_64-linux/megablocks/layers.py CHANGED
@@ -692,8 +692,8 @@ class MegaBlocksMoeMLP(torch.nn.Module):
692
  moe_normalize_expert_weights = getattr(self.experts, "normalize_expert_weights", None)
693
  uniform_expert_assignment = getattr(self, "uniform_expert_assignment", False)
694
 
695
- has_parallel = hasattr(self, "expert_parallel_group")
696
- expert_parallel_group = torch.distributed.group.WORLD
697
  forward_fn = parallel_forward_once if has_parallel else forward_once
698
 
699
  sort_end_bit = max(int(torch.ceil(torch.log2(torch.tensor(moe_num_experts)))), 1)
 
692
  moe_normalize_expert_weights = getattr(self.experts, "normalize_expert_weights", None)
693
  uniform_expert_assignment = getattr(self, "uniform_expert_assignment", False)
694
 
695
+ expert_parallel_group = getattr(self, "expert_parallel_group", None)
696
+ has_parallel = expert_parallel_group is not None and dist.is_initialized() and dist.get_world_size(expert_parallel_group) > 1
697
  forward_fn = parallel_forward_once if has_parallel else forward_once
698
 
699
  sort_end_bit = max(int(torch.ceil(torch.log2(torch.tensor(moe_num_experts)))), 1)
build/torch27-cxx11-cu128-x86_64-linux/megablocks/_megablocks_76c7de7.abi3.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64e2fd33ed4a5e9497ad304763c3c174ade26702a8e43fe8e7b3d3e79eb1e021
3
+ size 17892624
build/torch27-cxx11-cu128-x86_64-linux/megablocks/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _megablocks_9a1816c
3
- ops = torch.ops._megablocks_9a1816c
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_megablocks_9a1816c::{op_name}"
 
1
  import torch
2
+ from . import _megablocks_76c7de7
3
+ ops = torch.ops._megablocks_76c7de7
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_megablocks_76c7de7::{op_name}"
build/torch27-cxx11-cu128-x86_64-linux/megablocks/layers.py CHANGED
@@ -692,8 +692,8 @@ class MegaBlocksMoeMLP(torch.nn.Module):
692
  moe_normalize_expert_weights = getattr(self.experts, "normalize_expert_weights", None)
693
  uniform_expert_assignment = getattr(self, "uniform_expert_assignment", False)
694
 
695
- has_parallel = hasattr(self, "expert_parallel_group")
696
- expert_parallel_group = torch.distributed.group.WORLD
697
  forward_fn = parallel_forward_once if has_parallel else forward_once
698
 
699
  sort_end_bit = max(int(torch.ceil(torch.log2(torch.tensor(moe_num_experts)))), 1)
 
692
  moe_normalize_expert_weights = getattr(self.experts, "normalize_expert_weights", None)
693
  uniform_expert_assignment = getattr(self, "uniform_expert_assignment", False)
694
 
695
+ expert_parallel_group = getattr(self, "expert_parallel_group", None)
696
+ has_parallel = expert_parallel_group is not None and dist.is_initialized() and dist.get_world_size(expert_parallel_group) > 1
697
  forward_fn = parallel_forward_once if has_parallel else forward_once
698
 
699
  sort_end_bit = max(int(torch.ceil(torch.log2(torch.tensor(moe_num_experts)))), 1)