File size: 4,964 Bytes
0b32ad6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 |
"""
Common probing models
Authors:
* Leo 2022
"""
from typing import List
import torch.nn as nn
import s3prl.nn.pooling as pooling
__all__ = [
"FrameLevel",
"UtteranceLevel",
]
class FrameLevel(nn.Module):
"""
The common frame-to-frame probing model
Args:
input_size (int): input size
output_size (int): output size
hidden_sizes (List[int]): a list of hidden layers' hidden size.
by default is [256] to project all different input sizes to the same dimension.
set empty list to use the vanilla single layer linear model
activation_type (str): the activation class name in :obj:`torch.nn`. Set None to
disable activation and the model is pure linear. Default: None
activation_conf (dict): the arguments for initializing the activation class.
Default: empty dict
"""
def __init__(
self,
input_size: int,
output_size: int,
hidden_sizes: List[int] = None,
activation_type: str = None,
activation_conf: dict = None,
):
super().__init__()
self._indim = input_size
self._outdim = output_size
hidden_sizes = hidden_sizes or [256]
latest_size = input_size
hidden_layers = []
if len(hidden_sizes) > 0:
for size in hidden_sizes:
hidden_layers.append(nn.Linear(latest_size, size))
if activation_type is not None:
hidden_layers.append(
getattr(nn, activation_type)(**(activation_conf or {}))
)
latest_size = size
self.hidden_layers = nn.Sequential(*hidden_layers)
self.final_proj = nn.Linear(latest_size, output_size)
@property
def input_size(self) -> int:
return self._indim
@property
def output_size(self) -> int:
return self._outdim
def forward(self, x, x_len):
"""
Args:
x (torch.FloatTensor): (batch_size, seq_len, input_size)
x_len (torch.LongTensor): (batch_size, )
Returns:
tuple
1. ys (torch.FloatTensor): (batch_size, seq_len, output_size)
2. ys_len (torch.LongTensor): (batch_size, )
"""
ys = self.hidden_layers(x)
ys = self.final_proj(ys)
return ys, x_len
class UtteranceLevel(nn.Module):
"""
Args:
input_size (int): input_size
output_size (int): output_size
hidden_sizes (List[int]): a list of hidden layers' hidden size.
by default is [256] to project all different input sizes to the same dimension.
set empty list to use the vanilla single layer linear model
activation_type (str): the activation class name in :obj:`torch.nn`. Set None to
disable activation and the model is pure linear. Default: None
activation_conf (dict): the arguments for initializing the activation class.
Default: empty dict
pooling_type (str): the pooling class name in :obj:`s3prl.nn.pooling`. Default: MeanPooling
pooling_conf (dict): the arguments for initializing the pooling class.
Default: empty dict
"""
def __init__(
self,
input_size: int,
output_size: int,
hidden_sizes: List[int] = None,
activation_type: str = None,
activation_conf: dict = None,
pooling_type: str = "MeanPooling",
pooling_conf: dict = None,
):
super().__init__()
self._indim = input_size
self._outdim = output_size
hidden_sizes = hidden_sizes or [256]
latest_size = input_size
hidden_layers = []
if len(hidden_sizes) > 0:
for size in hidden_sizes:
hidden_layers.append(nn.Linear(latest_size, size))
if activation_type is not None:
hidden_layers.append(
getattr(nn, activation_type)(**(activation_conf or {}))
)
latest_size = size
self.hidden_layers = nn.Sequential(*hidden_layers)
pooling_conf = pooling_conf or {}
self.pooling = getattr(pooling, pooling_type)(latest_size, **pooling_conf)
latest_size = self.pooling.output_size
self.final_proj = nn.Linear(latest_size, output_size)
@property
def input_size(self) -> int:
return self._indim
@property
def output_size(self) -> int:
return self._outdim
def forward(self, x, x_len):
"""
Args:
x (torch.FloatTensor): (batch_size, seq_len, input_size)
x_len (torch.LongTensor): (batch_size, )
Returns:
torch.FloatTensor
(batch_size, output_size)
"""
x = self.hidden_layers(x)
x_pooled = self.pooling(x, x_len)
y = self.final_proj(x_pooled)
return y
|