cmahima's picture
Upload 534 files
d4f8fc2 verified
#include <ATen/ATen.h>
#include <vector>
#include "utils/checks.h"
#include "inplace_abn.h"
at::Tensor reduce_sum(at::Tensor x) {
if (x.ndimension() == 2) {
return x.sum(0);
} else {
auto x_view = x.view({x.size(0), x.size(1), -1});
return x_view.sum(-1).sum(0);
}
}
at::Tensor broadcast_to(at::Tensor v, at::Tensor x) {
if (x.ndimension() == 2) {
return v;
} else {
std::vector<int64_t> broadcast_size = {1, -1};
for (int64_t i = 2; i < x.ndimension(); ++i)
broadcast_size.push_back(1);
return v.view(broadcast_size);
}
}
int64_t count(at::Tensor x) {
int64_t count = x.size(0);
for (int64_t i = 2; i < x.ndimension(); ++i)
count *= x.size(i);
return count;
}
at::Tensor invert_affine(at::Tensor z, at::Tensor weight, at::Tensor bias, bool affine, float eps) {
if (affine) {
return (z - broadcast_to(bias, z)) / broadcast_to(at::abs(weight) + eps, z);
} else {
return z;
}
}
std::vector<at::Tensor> mean_var_cpu(at::Tensor x) {
auto num = count(x);
auto mean = reduce_sum(x) / num;
auto diff = x - broadcast_to(mean, x);
auto var = reduce_sum(diff.pow(2)) / num;
return {mean, var};
}
at::Tensor forward_cpu(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
bool affine, float eps) {
auto gamma = affine ? at::abs(weight) + eps : at::ones_like(var);
auto mul = at::rsqrt(var + eps) * gamma;
x.sub_(broadcast_to(mean, x));
x.mul_(broadcast_to(mul, x));
if (affine) x.add_(broadcast_to(bias, x));
return x;
}
std::vector<at::Tensor> edz_eydz_cpu(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
bool affine, float eps) {
auto edz = reduce_sum(dz);
auto y = invert_affine(z, weight, bias, affine, eps);
auto eydz = reduce_sum(y * dz);
return {edz, eydz};
}
at::Tensor backward_cpu(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
at::Tensor edz, at::Tensor eydz, bool affine, float eps) {
auto y = invert_affine(z, weight, bias, affine, eps);
auto mul = affine ? at::rsqrt(var + eps) * (at::abs(weight) + eps) : at::rsqrt(var + eps);
auto num = count(z);
auto dx = (dz - broadcast_to(edz / num, dz) - y * broadcast_to(eydz / num, dz)) * broadcast_to(mul, dz);
return dx;
}
void leaky_relu_backward_cpu(at::Tensor z, at::Tensor dz, float slope) {
CHECK_CPU_INPUT(z);
CHECK_CPU_INPUT(dz);
AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cpu", ([&] {
int64_t count = z.numel();
auto *_z = z.data<scalar_t>();
auto *_dz = dz.data<scalar_t>();
for (int64_t i = 0; i < count; ++i) {
if (_z[i] < 0) {
_z[i] *= 1 / slope;
_dz[i] *= slope;
}
}
}));
}
void elu_backward_cpu(at::Tensor z, at::Tensor dz) {
CHECK_CPU_INPUT(z);
CHECK_CPU_INPUT(dz);
AT_DISPATCH_FLOATING_TYPES(z.type(), "elu_backward_cpu", ([&] {
int64_t count = z.numel();
auto *_z = z.data<scalar_t>();
auto *_dz = dz.data<scalar_t>();
for (int64_t i = 0; i < count; ++i) {
if (_z[i] < 0) {
_z[i] = log1p(_z[i]);
_dz[i] *= (_z[i] + 1.f);
}
}
}));
}