Spaces:
Running
Running
File size: 4,247 Bytes
c61ccee |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
import collections
import warnings
from typing import Optional, Sequence, Union
import torch.cuda
__all__ = ["all_reduce", "reduce", "broadcast", "all_gather", "reduce_scatter"]
SUM = 0 # ncclRedOp_t
def is_available(tensors):
if not hasattr(torch._C, "_nccl_all_reduce"):
warnings.warn("PyTorch is not compiled with NCCL support")
return False
devices = set()
for tensor in tensors:
if tensor.is_sparse:
return False
if not tensor.is_contiguous():
return False
if not tensor.is_cuda:
return False
device = tensor.get_device()
if device in devices:
return False
devices.add(device)
return True
def version():
ver = torch._C._nccl_version()
major = ver >> 32
minor = (ver >> 16) & 65535
patch = ver & 65535
suffix = torch._C._nccl_version_suffix().decode("utf-8")
if suffix == "":
return (major, minor, patch)
else:
return (major, minor, patch, suffix)
def unique_id():
return torch._C._nccl_unique_id()
def init_rank(num_ranks, uid, rank):
return torch._C._nccl_init_rank(num_ranks, uid, rank)
def _check_sequence_type(inputs: Union[torch.Tensor, Sequence[torch.Tensor]]) -> None:
if not isinstance(inputs, collections.abc.Container) or isinstance(
inputs, torch.Tensor
):
raise TypeError("Inputs should be a collection of tensors")
def all_reduce(inputs, outputs=None, op=SUM, streams=None, comms=None):
_check_sequence_type(inputs)
if outputs is None:
outputs = inputs
_check_sequence_type(outputs)
torch._C._nccl_all_reduce(inputs, outputs, op, streams, comms)
# `output` used to be `outputs`, taking in a list of tensors. So we have two
# arguments for BC reasons.
def reduce(
inputs: Sequence[torch.Tensor],
output: Optional[Union[torch.Tensor, Sequence[torch.Tensor]]] = None,
root: int = 0,
op: int = SUM,
streams: Optional[Sequence[torch.cuda.Stream]] = None,
comms=None,
*,
outputs: Optional[Sequence[torch.Tensor]] = None,
) -> None:
_check_sequence_type(inputs)
_output: torch.Tensor
if outputs is not None:
if output is not None:
raise ValueError(
"'output' and 'outputs' can not be both specified. 'outputs' is deprecated in "
"favor of 'output', taking in a single output tensor. The signature of reduce is: "
"reduce(inputs, output=None, root=0, op=SUM, streams=None, comms=None)."
)
else:
warnings.warn(
"nccl.reduce with an output tensor list is deprecated. "
"Please specify a single output tensor with argument 'output' instead instead."
)
_output = outputs[root]
elif not isinstance(output, torch.Tensor) and isinstance(
output, collections.abc.Sequence
):
# User called old API with positional arguments of list of output tensors.
warnings.warn(
"nccl.reduce with an output tensor list is deprecated. "
"Please specify a single output tensor."
)
_output = output[root]
else:
_output = inputs[root] if output is None else output
torch._C._nccl_reduce(inputs, _output, root, op, streams, comms)
def broadcast(
inputs: Sequence[torch.Tensor], root: int = 0, streams=None, comms=None
) -> None:
_check_sequence_type(inputs)
torch._C._nccl_broadcast(inputs, root, streams, comms)
def all_gather(
inputs: Sequence[torch.Tensor],
outputs: Sequence[torch.Tensor],
streams=None,
comms=None,
) -> None:
_check_sequence_type(inputs)
_check_sequence_type(outputs)
torch._C._nccl_all_gather(inputs, outputs, streams, comms)
def reduce_scatter(
inputs: Sequence[torch.Tensor],
outputs: Sequence[torch.Tensor],
op: int = SUM,
streams=None,
comms=None,
) -> None:
_check_sequence_type(inputs)
_check_sequence_type(outputs)
torch._C._nccl_reduce_scatter(inputs, outputs, op, streams, comms)
|