import torch from torch import nn class FaceEncoder(nn.Module): def __init__(self): super(FaceEncoder, self).__init__() self.encoder = nn.Sequential( nn.Linear(96, 128), nn.BatchNorm1d(128), nn.ReLU(), nn.Linear(128, 64), nn.BatchNorm1d(64), nn.ReLU(), nn.Linear(64, 16), ) for m in self.modules(): if isinstance(m, torch.nn.Linear): torch.nn.init.kaiming_uniform_(m.weight, mode='fan_in', nonlinearity='relu') elif isinstance(m, nn.BatchNorm1d): m.weight.data.fill_(1) m.bias.data.zero_() def forward(self, x): return self.encoder(x) class AudioEncoder(nn.Module): def __init__(self): super(AudioEncoder, self).__init__() self.encoder = nn.Sequential( nn.Linear(12, 32), nn.BatchNorm1d(32), nn.ReLU(), nn.Linear(32, 64), nn.BatchNorm1d(64), nn.ReLU(), nn.Linear(64, 128), ) for m in self.modules(): if isinstance(m, torch.nn.Linear): torch.nn.init.kaiming_uniform_(m.weight, mode='fan_in', nonlinearity='relu') elif isinstance(m, nn.BatchNorm1d): m.weight.data.fill_(1) m.bias.data.zero_() def forward(self, x): return self.encoder(x) class FaceDecoder(nn.Module): def __init__(self): super(FaceDecoder, self).__init__() h_GRU = 144 self.stabilizer = nn.GRU(144, h_GRU, 2, batch_first = True, dropout = 0.2) self.decoder = nn.Sequential( nn.Linear(144, 256), nn.BatchNorm1d(256), nn.ReLU(), nn.Linear(256, 128), nn.BatchNorm1d(128), nn.ReLU(), nn.Linear(128, 40), nn.Sigmoid(), ) for m in self.modules(): if isinstance(m, torch.nn.Linear): torch.nn.init.kaiming_uniform_(m.weight, mode='fan_in', nonlinearity='relu') elif isinstance(m, nn.BatchNorm1d): m.weight.data.fill_(1) m.bias.data.zero_() def forward(self, x): x, _ = self.stabilizer(x) return self.decoder(x.reshape(-1, 144))