|
#version 450 |
|
|
|
#include "types.comp" |
|
|
|
#extension GL_EXT_shader_16bit_storage : require |
|
|
|
layout(push_constant) uniform parameter { |
|
uint IW; uint IH; |
|
uint OW; uint OH; |
|
uint OC; |
|
uint pelements; |
|
uint op; |
|
int k0; int k1; |
|
int s0; int s1; |
|
int p0; int p1; |
|
} p; |
|
|
|
#define BLOCK_SIZE 512 |
|
#define FLT_MAX 3.402823466e+38F |
|
#define OP_POOL_MAX 0u |
|
#define OP_POOL_AVG 1u |
|
|
|
layout (local_size_x = BLOCK_SIZE, local_size_y = 1, local_size_z = 1) in; |
|
|
|
layout(binding = 0) readonly buffer X {A_TYPE data_a[];}; |
|
layout(binding = 1) writeonly buffer D {D_TYPE data_d[];}; |
|
|
|
void main() { |
|
const uint idx = gl_GlobalInvocationID.x; |
|
if (idx >= p.pelements) { |
|
return; |
|
} |
|
|
|
const uint O_HW = p.OW * p.OH; |
|
|
|
const uint nc = idx / O_HW; |
|
const uint cur_oh = (idx % O_HW) / p.OW; |
|
const uint cur_ow = (idx % O_HW) % p.OW; |
|
|
|
const int start_h = int(cur_oh) * p.s0 - p.p0; |
|
const uint bh = max(start_h, 0); |
|
const uint eh = min(start_h + p.k0, p.IH); |
|
|
|
const int start_w = int(cur_ow) * p.s1 - p.p1; |
|
const uint bw = max(start_w, 0); |
|
const uint ew = min(start_w + p.k1, p.IW); |
|
|
|
const float scale = 1.0 / float(p.k0 * p.k1); |
|
float res; |
|
|
|
if (p.op == OP_POOL_AVG) { |
|
res = 0.0; |
|
} else if (p.op == OP_POOL_MAX) { |
|
res = -FLT_MAX; |
|
} else { |
|
return; |
|
} |
|
|
|
#pragma unroll |
|
for (uint i = bh; i < eh; i++) { |
|
#pragma unroll |
|
for (uint j = bw; j < ew; j++) { |
|
const float cur = D_TYPE(data_a[nc * p.IH * p.IW + i * p.IW + j]); |
|
|
|
if (p.op == OP_POOL_AVG) { |
|
res += cur * scale; |
|
} else if (p.op == OP_POOL_MAX) { |
|
res = max(res, cur); |
|
} |
|
} |
|
} |
|
|
|
data_d[nc * O_HW + cur_oh * p.OW + cur_ow] = res; |
|
} |
|
|