|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
%include "libavutil/x86/x86util.asm" |
|
|
|
SECTION_RODATA |
|
|
|
pw_7f: times 8 dw 0x7F |
|
pw_ff: times 8 dw 0xFF |
|
|
|
SECTION .text |
|
|
|
%macro FILTER_LINE 1 |
|
movh m0, [r2+r0] |
|
movh m1, [r3+r0] |
|
punpcklbw m0, m7 |
|
punpcklwd m1, m1 |
|
psllw m0, 7 |
|
psubw m1, m0 |
|
PABSW m2, m1 |
|
pmulhuw m2, m5 |
|
psubw m2, m6 |
|
pminsw m2, m7 |
|
pmullw m2, m2 |
|
psllw m1, 2 |
|
paddw m0, %1 |
|
pmulhw m1, m2 |
|
paddw m0, m1 |
|
psraw m0, 7 |
|
packuswb m0, m0 |
|
movh [r1+r0], m0 |
|
%endmacro |
|
|
|
INIT_MMX mmxext |
|
cglobal gradfun_filter_line, 6, 6 |
|
movh m5, r4d |
|
pxor m7, m7 |
|
pshufw m5, m5,0 |
|
mova m6, [pw_7f] |
|
mova m3, [r5] |
|
mova m4, [r5+8] |
|
.loop: |
|
FILTER_LINE m3 |
|
add r0, 4 |
|
jge .end |
|
FILTER_LINE m4 |
|
add r0, 4 |
|
jl .loop |
|
.end: |
|
RET |
|
|
|
INIT_XMM ssse3 |
|
cglobal gradfun_filter_line, 6, 6, 8 |
|
movd m5, r4d |
|
pxor m7, m7 |
|
pshuflw m5, m5, 0 |
|
mova m6, [pw_7f] |
|
punpcklqdq m5, m5 |
|
mova m4, [r5] |
|
.loop: |
|
FILTER_LINE m4 |
|
add r0, 8 |
|
jl .loop |
|
RET |
|
|
|
%macro BLUR_LINE 1 |
|
cglobal gradfun_blur_line_%1, 6, 6, 8 |
|
mova m7, [pw_ff] |
|
.loop: |
|
%1 m0, [r4+r0] |
|
%1 m1, [r5+r0] |
|
mova m2, m0 |
|
mova m3, m1 |
|
psrlw m0, 8 |
|
psrlw m1, 8 |
|
pand m2, m7 |
|
pand m3, m7 |
|
paddw m0, m1 |
|
paddw m2, m3 |
|
paddw m0, m2 |
|
paddw m0, [r2+r0] |
|
mova m1, [r1+r0] |
|
mova [r1+r0], m0 |
|
psubw m0, m1 |
|
mova [r3+r0], m0 |
|
add r0, 16 |
|
jl .loop |
|
RET |
|
%endmacro |
|
|
|
INIT_XMM sse2 |
|
BLUR_LINE movdqa |
|
BLUR_LINE movdqu |
|
|