|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#define REFERENCE_WHITE 100.0f |
|
extern float3 lrgb2yuv(float3); |
|
extern float lrgb2y(float3); |
|
extern float3 yuv2lrgb(float3); |
|
extern float3 lrgb2lrgb(float3); |
|
extern float get_luma_src(float3); |
|
extern float get_luma_dst(float3); |
|
extern float3 ootf(float3 c, float peak); |
|
extern float3 inverse_ootf(float3 c, float peak); |
|
extern float3 get_chroma_sample(float3, float3, float3, float3); |
|
|
|
struct detection_result { |
|
float peak; |
|
float average; |
|
}; |
|
|
|
float hable_f(float in) { |
|
float a = 0.15f, b = 0.50f, c = 0.10f, d = 0.20f, e = 0.02f, f = 0.30f; |
|
return (in * (in * a + b * c) + d * e) / (in * (in * a + b) + d * f) - e / f; |
|
} |
|
|
|
float direct(float s, float peak) { |
|
return s; |
|
} |
|
|
|
float linear(float s, float peak) { |
|
return s * tone_param / peak; |
|
} |
|
|
|
float gamma(float s, float peak) { |
|
float p = s > 0.05f ? s /peak : 0.05f / peak; |
|
float v = powr(p, 1.0f / tone_param); |
|
return s > 0.05f ? v : (s * v /0.05f); |
|
} |
|
|
|
float clip(float s, float peak) { |
|
return clamp(s * tone_param, 0.0f, 1.0f); |
|
} |
|
|
|
float reinhard(float s, float peak) { |
|
return s / (s + tone_param) * (peak + tone_param) / peak; |
|
} |
|
|
|
float hable(float s, float peak) { |
|
return hable_f(s)/hable_f(peak); |
|
} |
|
|
|
float mobius(float s, float peak) { |
|
float j = tone_param; |
|
float a, b; |
|
|
|
if (s <= j) |
|
return s; |
|
|
|
a = -j * j * (peak - 1.0f) / (j * j - 2.0f * j + peak); |
|
b = (j * j - 2.0f * j * peak + peak) / max(peak - 1.0f, 1e-6f); |
|
|
|
return (b * b + 2.0f * b * j + j * j) / (b - a) * (s + a) / (s + b); |
|
} |
|
|
|
|
|
|
|
struct detection_result |
|
detect_peak_avg(global uint *util_buf, __local uint *sum_wg, |
|
float signal, float peak) { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
global uint *avg_buf = util_buf; |
|
global uint *peak_buf = avg_buf + DETECTION_FRAMES + 1; |
|
global uint *counter_wg_p = peak_buf + DETECTION_FRAMES + 1; |
|
global uint *max_total_p = counter_wg_p + 1; |
|
global uint *avg_total_p = max_total_p + 1; |
|
global uint *frame_idx_p = avg_total_p + 1; |
|
global uint *scene_frame_num_p = frame_idx_p + 1; |
|
|
|
uint frame_idx = *frame_idx_p; |
|
uint scene_frame_num = *scene_frame_num_p; |
|
|
|
size_t lidx = get_local_id(0); |
|
size_t lidy = get_local_id(1); |
|
size_t lsizex = get_local_size(0); |
|
size_t lsizey = get_local_size(1); |
|
uint num_wg = get_num_groups(0) * get_num_groups(1); |
|
size_t group_idx = get_group_id(0); |
|
size_t group_idy = get_group_id(1); |
|
struct detection_result r = {peak, sdr_avg}; |
|
if (lidx == 0 && lidy == 0) |
|
*sum_wg = 0; |
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
|
|
atomic_add(sum_wg, (uint)(signal * REFERENCE_WHITE)); |
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
|
|
if (lidx == 0 && lidy == 0) { |
|
uint avg_wg = *sum_wg / (lsizex * lsizey); |
|
atomic_max(&peak_buf[frame_idx], avg_wg); |
|
atomic_add(&avg_buf[frame_idx], avg_wg); |
|
} |
|
|
|
if (scene_frame_num > 0) { |
|
float peak = (float)*max_total_p / (REFERENCE_WHITE * scene_frame_num); |
|
float avg = (float)*avg_total_p / (REFERENCE_WHITE * scene_frame_num); |
|
r.peak = max(1.0f, peak); |
|
r.average = max(0.25f, avg); |
|
} |
|
|
|
if (lidx == 0 && lidy == 0 && atomic_add(counter_wg_p, 1) == num_wg - 1) { |
|
*counter_wg_p = 0; |
|
avg_buf[frame_idx] /= num_wg; |
|
|
|
if (scene_threshold > 0.0f) { |
|
uint cur_max = peak_buf[frame_idx]; |
|
uint cur_avg = avg_buf[frame_idx]; |
|
int diff = (int)(scene_frame_num * cur_avg) - (int)*avg_total_p; |
|
|
|
if (abs(diff) > scene_frame_num * scene_threshold * REFERENCE_WHITE) { |
|
for (uint i = 0; i < DETECTION_FRAMES + 1; i++) |
|
avg_buf[i] = 0; |
|
for (uint i = 0; i < DETECTION_FRAMES + 1; i++) |
|
peak_buf[i] = 0; |
|
*avg_total_p = *max_total_p = 0; |
|
*scene_frame_num_p = 0; |
|
avg_buf[frame_idx] = cur_avg; |
|
peak_buf[frame_idx] = cur_max; |
|
} |
|
} |
|
uint next = (frame_idx + 1) % (DETECTION_FRAMES + 1); |
|
|
|
*max_total_p += peak_buf[frame_idx] - peak_buf[next]; |
|
*avg_total_p += avg_buf[frame_idx] - avg_buf[next]; |
|
|
|
peak_buf[next] = avg_buf[next] = 0; |
|
*frame_idx_p = next; |
|
*scene_frame_num_p = min(*scene_frame_num_p + 1, |
|
(uint)DETECTION_FRAMES); |
|
} |
|
return r; |
|
} |
|
|
|
float3 map_one_pixel_rgb(float3 rgb, float peak, float average) { |
|
float sig = max(max(rgb.x, max(rgb.y, rgb.z)), 1e-6f); |
|
|
|
|
|
|
|
|
|
if (target_peak > 1.0f) { |
|
sig *= 1.0f / target_peak; |
|
peak *= 1.0f / target_peak; |
|
} |
|
|
|
float sig_old = sig; |
|
|
|
|
|
float slope = min(1.0f, sdr_avg / average); |
|
sig *= slope; |
|
peak *= slope; |
|
|
|
|
|
if (desat_param > 0.0f) { |
|
float luma = get_luma_dst(rgb); |
|
float coeff = max(sig - 0.18f, 1e-6f) / max(sig, 1e-6f); |
|
coeff = native_powr(coeff, 10.0f / desat_param); |
|
rgb = mix(rgb, (float3)luma, (float3)coeff); |
|
sig = mix(sig, luma * slope, coeff); |
|
} |
|
|
|
sig = TONE_FUNC(sig, peak); |
|
|
|
sig = min(sig, 1.0f); |
|
rgb *= (sig/sig_old); |
|
return rgb; |
|
} |
|
|
|
float3 map_to_dst_space_from_yuv(float3 yuv, float peak) { |
|
float3 c = yuv2lrgb(yuv); |
|
c = ootf(c, peak); |
|
c = lrgb2lrgb(c); |
|
return c; |
|
} |
|
|
|
__kernel void tonemap(__write_only image2d_t dst1, |
|
__read_only image2d_t src1, |
|
__write_only image2d_t dst2, |
|
__read_only image2d_t src2, |
|
global uint *util_buf, |
|
float peak |
|
) |
|
{ |
|
__local uint sum_wg; |
|
const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE | |
|
CLK_ADDRESS_CLAMP_TO_EDGE | |
|
CLK_FILTER_NEAREST); |
|
int xi = get_global_id(0); |
|
int yi = get_global_id(1); |
|
|
|
int x = 2 * xi; |
|
int y = 2 * yi; |
|
|
|
float y0 = read_imagef(src1, sampler, (int2)(x, y)).x; |
|
float y1 = read_imagef(src1, sampler, (int2)(x + 1, y)).x; |
|
float y2 = read_imagef(src1, sampler, (int2)(x, y + 1)).x; |
|
float y3 = read_imagef(src1, sampler, (int2)(x + 1, y + 1)).x; |
|
float2 uv = read_imagef(src2, sampler, (int2)(xi, yi)).xy; |
|
|
|
float3 c0 = map_to_dst_space_from_yuv((float3)(y0, uv.x, uv.y), peak); |
|
float3 c1 = map_to_dst_space_from_yuv((float3)(y1, uv.x, uv.y), peak); |
|
float3 c2 = map_to_dst_space_from_yuv((float3)(y2, uv.x, uv.y), peak); |
|
float3 c3 = map_to_dst_space_from_yuv((float3)(y3, uv.x, uv.y), peak); |
|
|
|
float sig0 = max(c0.x, max(c0.y, c0.z)); |
|
float sig1 = max(c1.x, max(c1.y, c1.z)); |
|
float sig2 = max(c2.x, max(c2.y, c2.z)); |
|
float sig3 = max(c3.x, max(c3.y, c3.z)); |
|
float sig = max(sig0, max(sig1, max(sig2, sig3))); |
|
|
|
struct detection_result r = detect_peak_avg(util_buf, &sum_wg, sig, peak); |
|
|
|
float3 c0_old = c0, c1_old = c1, c2_old = c2; |
|
c0 = map_one_pixel_rgb(c0, r.peak, r.average); |
|
c1 = map_one_pixel_rgb(c1, r.peak, r.average); |
|
c2 = map_one_pixel_rgb(c2, r.peak, r.average); |
|
c3 = map_one_pixel_rgb(c3, r.peak, r.average); |
|
|
|
c0 = inverse_ootf(c0, target_peak); |
|
c1 = inverse_ootf(c1, target_peak); |
|
c2 = inverse_ootf(c2, target_peak); |
|
c3 = inverse_ootf(c3, target_peak); |
|
|
|
y0 = lrgb2y(c0); |
|
y1 = lrgb2y(c1); |
|
y2 = lrgb2y(c2); |
|
y3 = lrgb2y(c3); |
|
float3 chroma_c = get_chroma_sample(c0, c1, c2, c3); |
|
float3 chroma = lrgb2yuv(chroma_c); |
|
|
|
if (xi < get_image_width(dst2) && yi < get_image_height(dst2)) { |
|
write_imagef(dst1, (int2)(x, y), (float4)(y0, 0.0f, 0.0f, 1.0f)); |
|
write_imagef(dst1, (int2)(x+1, y), (float4)(y1, 0.0f, 0.0f, 1.0f)); |
|
write_imagef(dst1, (int2)(x, y+1), (float4)(y2, 0.0f, 0.0f, 1.0f)); |
|
write_imagef(dst1, (int2)(x+1, y+1), (float4)(y3, 0.0f, 0.0f, 1.0f)); |
|
write_imagef(dst2, (int2)(xi, yi), |
|
(float4)(chroma.y, chroma.z, 0.0f, 1.0f)); |
|
} |
|
} |
|
|