|
import torch |
|
from torch import nn |
|
|
|
class FaceEncoder(nn.Module): |
|
def __init__(self): |
|
super(FaceEncoder, self).__init__() |
|
|
|
self.encoder = nn.Sequential( |
|
nn.Linear(96, 128), |
|
nn.BatchNorm1d(128), |
|
nn.ReLU(), |
|
nn.Linear(128, 64), |
|
nn.BatchNorm1d(64), |
|
nn.ReLU(), |
|
nn.Linear(64, 16), |
|
) |
|
|
|
for m in self.modules(): |
|
if isinstance(m, torch.nn.Linear): |
|
torch.nn.init.kaiming_uniform_(m.weight, mode='fan_in', nonlinearity='relu') |
|
elif isinstance(m, nn.BatchNorm1d): |
|
m.weight.data.fill_(1) |
|
m.bias.data.zero_() |
|
|
|
def forward(self, x): |
|
return self.encoder(x) |
|
|
|
|
|
class AudioEncoder(nn.Module): |
|
def __init__(self): |
|
super(AudioEncoder, self).__init__() |
|
|
|
self.encoder = nn.Sequential( |
|
nn.Linear(12, 32), |
|
nn.BatchNorm1d(32), |
|
nn.ReLU(), |
|
nn.Linear(32, 64), |
|
nn.BatchNorm1d(64), |
|
nn.ReLU(), |
|
nn.Linear(64, 128), |
|
) |
|
|
|
for m in self.modules(): |
|
if isinstance(m, torch.nn.Linear): |
|
torch.nn.init.kaiming_uniform_(m.weight, mode='fan_in', nonlinearity='relu') |
|
elif isinstance(m, nn.BatchNorm1d): |
|
m.weight.data.fill_(1) |
|
m.bias.data.zero_() |
|
|
|
def forward(self, x): |
|
return self.encoder(x) |
|
|
|
|
|
class FaceDecoder(nn.Module): |
|
def __init__(self): |
|
super(FaceDecoder, self).__init__() |
|
h_GRU = 144 |
|
self.stabilizer = nn.GRU(144, h_GRU, 2, batch_first = True, dropout = 0.2) |
|
|
|
self.decoder = nn.Sequential( |
|
nn.Linear(144, 256), |
|
nn.BatchNorm1d(256), |
|
nn.ReLU(), |
|
nn.Linear(256, 128), |
|
nn.BatchNorm1d(128), |
|
nn.ReLU(), |
|
nn.Linear(128, 40), |
|
nn.Sigmoid(), |
|
) |
|
|
|
for m in self.modules(): |
|
if isinstance(m, torch.nn.Linear): |
|
torch.nn.init.kaiming_uniform_(m.weight, mode='fan_in', nonlinearity='relu') |
|
elif isinstance(m, nn.BatchNorm1d): |
|
m.weight.data.fill_(1) |
|
m.bias.data.zero_() |
|
|
|
def forward(self, x): |
|
x, _ = self.stabilizer(x) |
|
return self.decoder(x.reshape(-1, 144)) |
|
|
|
|