Article
Introducing multi-backends (TRT-LLM, vLLM) support for Text Generation Inference
•
70
Is the bias term in the Linear layer applied twice in this case?
def forward(self, x, batch_size):
local_input = torch.zeros(batch_size, self.local_in_features, device=self.device)
dist.scatter(local_input, list(x.chunk(self.world_size, dim=1)) if self.rank == 0 else None, src=0)
# Bias is applied for both rank 0 and rank 1 🤔
local_output = self.linear(local_input)
dist.reduce(local_output, dst=0, op=dist.ReduceOp.SUM)
return local_output
Should we do this instead?
def forward(self, x, batch_size):
local_input = torch.zeros(batch_size, self.local_in_features, device=self.device)
dist.scatter(local_input, list(x.chunk(self.world_size, dim=1)) if self.rank == 0 else None, src=0)
# Compute linear transformation without bias
local_output = nn.functional.linear(local_input, self.linear.weight, None)
dist.reduce(local_output, dst=0, op=dist.ReduceOp.SUM)
# Add bias only on rank 0 after reduction
if self.rank == 0:
local_output = local_output + self.linear.bias
return local_output
Thanks for the article though - that was helpful