|
import time |
|
|
|
import numpy as np |
|
import torch |
|
import torch.nn.functional as F |
|
from scipy import linalg |
|
import math |
|
from data_utils.rotation_conversion import axis_angle_to_matrix, matrix_to_rotation_6d |
|
|
|
import warnings |
|
warnings.filterwarnings("ignore", category=RuntimeWarning) |
|
|
|
|
|
change_angle = torch.tensor([6.0181e-05, 5.1597e-05, 2.1344e-04, 2.1899e-04]) |
|
class EmbeddingSpaceEvaluator: |
|
def __init__(self, ae, vae, device): |
|
|
|
|
|
self.ae = ae |
|
|
|
|
|
|
|
self.real_feat_list = [] |
|
self.generated_feat_list = [] |
|
self.real_joints_list = [] |
|
self.generated_joints_list = [] |
|
self.real_6d_list = [] |
|
self.generated_6d_list = [] |
|
self.audio_beat_list = [] |
|
|
|
def reset(self): |
|
self.real_feat_list = [] |
|
self.generated_feat_list = [] |
|
|
|
def get_no_of_samples(self): |
|
return len(self.real_feat_list) |
|
|
|
def push_samples(self, generated_poses, real_poses): |
|
|
|
|
|
real_feat, real_poses = self.ae.extract(real_poses) |
|
generated_feat, generated_poses = self.ae.extract(generated_poses) |
|
|
|
num_joints = real_poses.shape[2] // 3 |
|
|
|
real_feat = real_feat.squeeze() |
|
generated_feat = generated_feat.reshape(generated_feat.shape[0]*generated_feat.shape[1], -1) |
|
|
|
self.real_feat_list.append(real_feat.data.cpu().numpy()) |
|
self.generated_feat_list.append(generated_feat.data.cpu().numpy()) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def push_joints(self, generated_poses, real_poses): |
|
self.real_joints_list.append(real_poses.data.cpu()) |
|
self.generated_joints_list.append(generated_poses.squeeze().data.cpu()) |
|
|
|
def push_aud(self, aud): |
|
self.audio_beat_list.append(aud.squeeze().data.cpu()) |
|
|
|
def get_MAAC(self): |
|
ang_vel_list = [] |
|
for real_joints in self.real_joints_list: |
|
real_joints[:, 15:21] = real_joints[:, 16:22] |
|
vec = real_joints[:, 15:21] - real_joints[:, 13:19] |
|
inner_product = torch.einsum('kij,kij->ki', [vec[:, 2:], vec[:, :-2]]) |
|
inner_product = torch.clamp(inner_product, -1, 1, out=None) |
|
angle = torch.acos(inner_product) / math.pi |
|
ang_vel = (angle[1:] - angle[:-1]).abs().mean(dim=0) |
|
ang_vel_list.append(ang_vel.unsqueeze(dim=0)) |
|
all_vel = torch.cat(ang_vel_list, dim=0) |
|
MAAC = all_vel.mean(dim=0) |
|
return MAAC |
|
|
|
def get_BCscore(self): |
|
thres = 0.01 |
|
sigma = 0.1 |
|
sum_1 = 0 |
|
total_beat = 0 |
|
for joints, audio_beat_time in zip(self.generated_joints_list, self.audio_beat_list): |
|
motion_beat_time = [] |
|
if joints.dim() == 4: |
|
joints = joints[0] |
|
joints[:, 15:21] = joints[:, 16:22] |
|
vec = joints[:, 15:21] - joints[:, 13:19] |
|
inner_product = torch.einsum('kij,kij->ki', [vec[:, 2:], vec[:, :-2]]) |
|
inner_product = torch.clamp(inner_product, -1, 1, out=None) |
|
angle = torch.acos(inner_product) / math.pi |
|
ang_vel = (angle[1:] - angle[:-1]).abs() / change_angle / len(change_angle) |
|
|
|
angle_diff = torch.cat((torch.zeros(1, 4), ang_vel), dim=0) |
|
|
|
sum_2 = 0 |
|
for i in range(angle_diff.shape[1]): |
|
motion_beat_time = [] |
|
for t in range(1, joints.shape[0]-1): |
|
if (angle_diff[t][i] < angle_diff[t - 1][i] and angle_diff[t][i] < angle_diff[t + 1][i]): |
|
if (angle_diff[t - 1][i] - angle_diff[t][i] >= thres or angle_diff[t + 1][i] - angle_diff[ |
|
t][i] >= thres): |
|
motion_beat_time.append(float(t) / 30.0) |
|
if (len(motion_beat_time) == 0): |
|
continue |
|
motion_beat_time = torch.tensor(motion_beat_time) |
|
sum = 0 |
|
for audio in audio_beat_time: |
|
sum += np.power(math.e, -(np.power((audio.item() - motion_beat_time), 2)).min() / (2 * sigma * sigma)) |
|
sum_2 = sum_2 + sum |
|
total_beat = total_beat + len(audio_beat_time) |
|
sum_1 = sum_1 + sum_2 |
|
return sum_1/total_beat |
|
|
|
|
|
def get_scores(self): |
|
generated_feats = np.vstack(self.generated_feat_list) |
|
real_feats = np.vstack(self.real_feat_list) |
|
|
|
def frechet_distance(samples_A, samples_B): |
|
A_mu = np.mean(samples_A, axis=0) |
|
A_sigma = np.cov(samples_A, rowvar=False) |
|
B_mu = np.mean(samples_B, axis=0) |
|
B_sigma = np.cov(samples_B, rowvar=False) |
|
try: |
|
frechet_dist = self.calculate_frechet_distance(A_mu, A_sigma, B_mu, B_sigma) |
|
except ValueError: |
|
frechet_dist = 1e+10 |
|
return frechet_dist |
|
|
|
|
|
|
|
frechet_dist = frechet_distance(generated_feats, real_feats) |
|
|
|
|
|
|
|
dists = [] |
|
for i in range(real_feats.shape[0]): |
|
d = np.sum(np.absolute(real_feats[i] - generated_feats[i])) |
|
dists.append(d) |
|
feat_dist = np.mean(dists) |
|
|
|
return frechet_dist, feat_dist |
|
|
|
@staticmethod |
|
def calculate_frechet_distance(mu1, sigma1, mu2, sigma2, eps=1e-6): |
|
""" from https://github.com/mseitzer/pytorch-fid/blob/master/fid_score.py """ |
|
"""Numpy implementation of the Frechet Distance. |
|
The Frechet distance between two multivariate Gaussians X_1 ~ N(mu_1, C_1) |
|
and X_2 ~ N(mu_2, C_2) is |
|
d^2 = ||mu_1 - mu_2||^2 + Tr(C_1 + C_2 - 2*sqrt(C_1*C_2)). |
|
Stable version by Dougal J. Sutherland. |
|
Params: |
|
-- mu1 : Numpy array containing the activations of a layer of the |
|
inception net (like returned by the function 'get_predictions') |
|
for generated samples. |
|
-- mu2 : The sample mean over activations, precalculated on an |
|
representative data set. |
|
-- sigma1: The covariance matrix over activations for generated samples. |
|
-- sigma2: The covariance matrix over activations, precalculated on an |
|
representative data set. |
|
Returns: |
|
-- : The Frechet Distance. |
|
""" |
|
|
|
mu1 = np.atleast_1d(mu1) |
|
mu2 = np.atleast_1d(mu2) |
|
|
|
sigma1 = np.atleast_2d(sigma1) |
|
sigma2 = np.atleast_2d(sigma2) |
|
|
|
assert mu1.shape == mu2.shape, \ |
|
'Training and test mean vectors have different lengths' |
|
assert sigma1.shape == sigma2.shape, \ |
|
'Training and test covariances have different dimensions' |
|
|
|
diff = mu1 - mu2 |
|
|
|
|
|
covmean, _ = linalg.sqrtm(sigma1.dot(sigma2), disp=False) |
|
if not np.isfinite(covmean).all(): |
|
msg = ('fid calculation produces singular product; ' |
|
'adding %s to diagonal of cov estimates') % eps |
|
print(msg) |
|
offset = np.eye(sigma1.shape[0]) * eps |
|
covmean = linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset)) |
|
|
|
|
|
if np.iscomplexobj(covmean): |
|
if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-3): |
|
m = np.max(np.abs(covmean.imag)) |
|
raise ValueError('Imaginary component {}'.format(m)) |
|
covmean = covmean.real |
|
|
|
tr_covmean = np.trace(covmean) |
|
|
|
return (diff.dot(diff) + np.trace(sigma1) + |
|
np.trace(sigma2) - 2 * tr_covmean) |