File size: 3,580 Bytes
caa56d6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
"""ResNe(X)t Head helper."""
import torch
import torch.nn as nn
class ResNetBasicHead(nn.Module):
"""
ResNe(X)t 3D head.
This layer performs a fully-connected projection during training, when the
input size is 1x1x1. It performs a convolutional projection during testing
when the input size is larger than 1x1x1. If the inputs are from multiple
different pathways, the inputs will be concatenated after pooling.
"""
def __init__(
self,
dim_in,
num_classes,
pool_size,
dropout_rate=0.0,
act_func="softmax",
):
"""
The `__init__` method of any subclass should also contain these
arguments.
ResNetBasicHead takes p pathways as input where p in [1, infty].
Args:
dim_in (list): the list of channel dimensions of the p inputs to the
ResNetHead.
num_classes (int): the channel dimensions of the p outputs to the
ResNetHead.
pool_size (list): the list of kernel sizes of p spatial temporal
poolings, temporal pool kernel size, spatial pool kernel size,
spatial pool kernel size in order.
dropout_rate (float): dropout rate. If equal to 0.0, perform no
dropout.
act_func (string): activation function to use. 'softmax': applies
softmax on the output. 'sigmoid': applies sigmoid on the output.
"""
super(ResNetBasicHead, self).__init__()
assert (
len({len(pool_size), len(dim_in)}) == 1
), "pathway dimensions are not consistent."
self.num_pathways = len(pool_size)
for pathway in range(self.num_pathways):
if pool_size[pathway] is None:
avg_pool = nn.AdaptiveAvgPool3d((1, 1, 1))
else:
avg_pool = nn.AvgPool3d(pool_size[pathway], stride=1)
self.add_module("pathway{}_avgpool".format(pathway), avg_pool)
if dropout_rate > 0.0:
self.dropout = nn.Dropout(dropout_rate)
# Perform FC in a fully convolutional manner. The FC layer will be
# initialized with a different std comparing to convolutional layers.
self.projection = nn.Linear(sum(dim_in), num_classes, bias=True)
# Softmax for evaluation and testing.
if act_func == "softmax":
self.act = nn.Softmax(dim=4)
elif act_func == "sigmoid":
self.act = nn.Sigmoid()
else:
raise NotImplementedError(
"{} is not supported as an activation"
"function.".format(act_func)
)
def forward(self, inputs):
assert (
len(inputs) == self.num_pathways
), "Input tensor does not contain {} pathway".format(self.num_pathways)
pool_out = []
for pathway in range(self.num_pathways):
m = getattr(self, "pathway{}_avgpool".format(pathway))
pool_out.append(m(inputs[pathway]))
x = torch.cat(pool_out, 1)
# (N, C, T, H, W) -> (N, T, H, W, C).
x = x.permute((0, 2, 3, 4, 1))
# Perform dropout.
if hasattr(self, "dropout"):
x = self.dropout(x)
x = self.projection(x)
# Performs fully convlutional inference.
# if not self.training:
# x = x.mean([1, 2, 3])
x = self.act(x)
x = x.view(x.shape[0], -1)
return x
|