;****************************************************************************** | |
;* SIMD-optimized UTVideo functions | |
;* Copyright (c) 2017 Paul B Mahol | |
;* Copyright (c) 2017 Jokyo Images | |
;* | |
;* This file is part of FFmpeg. | |
;* | |
;* FFmpeg is free software; you can redistribute it and/or | |
;* modify it under the terms of the GNU Lesser General Public | |
;* License as published by the Free Software Foundation; either | |
;* version 2.1 of the License, or (at your option) any later version. | |
;* | |
;* FFmpeg is distributed in the hope that it will be useful, | |
;* but WITHOUT ANY WARRANTY; without even the implied warranty of | |
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
;* Lesser General Public License for more details. | |
;* | |
;* You should have received a copy of the GNU Lesser General Public | |
;* License along with FFmpeg; if not, write to the Free Software | |
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
;****************************************************************************** | |
%include "libavutil/x86/x86util.asm" | |
SECTION_RODATA | |
cextern pb_80 | |
cextern pw_512 | |
cextern pw_1023 | |
SECTION .text | |
;------------------------------------------------------------------------------------------- | |
; void restore_rgb_planes(uint8_t *src_r, uint8_t *src_g, uint8_t *src_b, | |
; ptrdiff_t linesize_r, ptrdiff_t linesize_g, ptrdiff_t linesize_b, | |
; int width, int height) | |
;------------------------------------------------------------------------------------------- | |
%macro RESTORE_RGB_PLANES 0 | |
cglobal restore_rgb_planes, 7 + ARCH_X86_64, 7 + ARCH_X86_64 * 2, 4, src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, w, h, x | |
movsxdifnidn wq, wd | |
add src_rq, wq | |
add src_gq, wq | |
add src_bq, wq | |
neg wq | |
%if ARCH_X86_64 == 0 | |
mov wm, wq | |
DEFINE_ARGS src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, x | |
%define wq r6m | |
%define hd r7mp | |
%endif | |
mova m3, [pb_80] | |
.nextrow: | |
mov xq, wq | |
.loop: | |
mova m0, [src_rq + xq] | |
mova m1, [src_gq + xq] | |
mova m2, [src_bq + xq] | |
psubb m1, m3 | |
paddb m0, m1 | |
paddb m2, m1 | |
mova [src_rq+xq], m0 | |
mova [src_bq+xq], m2 | |
add xq, mmsize | |
jl .loop | |
add src_rq, linesize_rq | |
add src_gq, linesize_gq | |
add src_bq, linesize_bq | |
sub hd, 1 | |
jg .nextrow | |
RET | |
%endmacro | |
INIT_XMM sse2 | |
RESTORE_RGB_PLANES | |
%if HAVE_AVX2_EXTERNAL | |
INIT_YMM avx2 | |
RESTORE_RGB_PLANES | |
%endif | |
;------------------------------------------------------------------------------------------- | |
; void restore_rgb_planes10(uint16_t *src_r, uint16_t *src_g, uint16_t *src_b, | |
; ptrdiff_t linesize_r, ptrdiff_t linesize_g, ptrdiff_t linesize_b, | |
; int width, int height) | |
;------------------------------------------------------------------------------------------- | |
%macro RESTORE_RGB_PLANES10 0 | |
cglobal restore_rgb_planes10, 7 + ARCH_X86_64, 7 + ARCH_X86_64 * 2, 5, src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, w, h, x | |
shl wd, 1 | |
shl linesize_rq, 1 | |
shl linesize_gq, 1 | |
shl linesize_bq, 1 | |
add src_rq, wq | |
add src_gq, wq | |
add src_bq, wq | |
mova m3, [pw_512] | |
mova m4, [pw_1023] | |
neg wq | |
%if ARCH_X86_64 == 0 | |
mov wm, wq | |
DEFINE_ARGS src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, x | |
%define wq r6m | |
%define hd r7mp | |
%endif | |
.nextrow: | |
mov xq, wq | |
.loop: | |
mova m0, [src_rq + xq] | |
mova m1, [src_gq + xq] | |
mova m2, [src_bq + xq] | |
psubw m1, m3 | |
paddw m0, m1 | |
paddw m2, m1 | |
pand m0, m4 | |
pand m2, m4 | |
mova [src_rq+xq], m0 | |
mova [src_bq+xq], m2 | |
add xq, mmsize | |
jl .loop | |
add src_rq, linesize_rq | |
add src_gq, linesize_gq | |
add src_bq, linesize_bq | |
sub hd, 1 | |
jg .nextrow | |
RET | |
%endmacro | |
INIT_XMM sse2 | |
RESTORE_RGB_PLANES10 | |
%if HAVE_AVX2_EXTERNAL | |
INIT_YMM avx2 | |
RESTORE_RGB_PLANES10 | |
%endif | |