|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
%include "libavutil/x86/x86util.asm" |
|
|
|
SECTION .text |
|
|
|
INIT_XMM sse4 |
|
%if ARCH_X86_64 |
|
cglobal alac_decorrelate_stereo, 2, 5, 8, buf0, len, shift, weight, buf1 |
|
%else |
|
cglobal alac_decorrelate_stereo, 2, 3, 8, buf0, len, shift, weight |
|
%define buf1q r2q |
|
%endif |
|
movd m6, shiftm |
|
movd m7, weightm |
|
SPLATD m7 |
|
shl lend, 2 |
|
mov buf1q, [buf0q + gprsize] |
|
mov buf0q, [buf0q] |
|
add buf1q, lenq |
|
add buf0q, lenq |
|
neg lenq |
|
|
|
align 16 |
|
.loop: |
|
mova m0, [buf0q + lenq] |
|
mova m1, [buf0q + lenq + mmsize] |
|
mova m2, [buf1q + lenq] |
|
mova m3, [buf1q + lenq + mmsize] |
|
pmulld m4, m2, m7 |
|
pmulld m5, m3, m7 |
|
psrad m4, m6 |
|
psrad m5, m6 |
|
psubd m0, m4 |
|
psubd m1, m5 |
|
paddd m2, m0 |
|
paddd m3, m1 |
|
mova [buf1q + lenq], m0 |
|
mova [buf1q + lenq + mmsize], m1 |
|
mova [buf0q + lenq], m2 |
|
mova [buf0q + lenq + mmsize], m3 |
|
|
|
add lenq, mmsize*2 |
|
jl .loop |
|
RET |
|
|
|
INIT_XMM sse2 |
|
cglobal alac_append_extra_bits_stereo, 2, 5, 5, buf0, exbuf0, buf1, exbuf1, len |
|
movifnidn lend, lenm |
|
movd m4, r2m |
|
shl lend, 2 |
|
mov buf1q, [buf0q + gprsize] |
|
mov buf0q, [buf0q] |
|
mov exbuf1q, [exbuf0q + gprsize] |
|
mov exbuf0q, [exbuf0q] |
|
add buf1q, lenq |
|
add buf0q, lenq |
|
add exbuf1q, lenq |
|
add exbuf0q, lenq |
|
neg lenq |
|
|
|
align 16 |
|
.loop: |
|
mova m0, [buf0q + lenq] |
|
mova m1, [buf0q + lenq + mmsize] |
|
pslld m0, m4 |
|
pslld m1, m4 |
|
mova m2, [buf1q + lenq] |
|
mova m3, [buf1q + lenq + mmsize] |
|
pslld m2, m4 |
|
pslld m3, m4 |
|
por m0, [exbuf0q + lenq] |
|
por m1, [exbuf0q + lenq + mmsize] |
|
por m2, [exbuf1q + lenq] |
|
por m3, [exbuf1q + lenq + mmsize] |
|
mova [buf0q + lenq ], m0 |
|
mova [buf0q + lenq + mmsize], m1 |
|
mova [buf1q + lenq ], m2 |
|
mova [buf1q + lenq + mmsize], m3 |
|
|
|
add lenq, mmsize*2 |
|
jl .loop |
|
RET |
|
|
|
%if ARCH_X86_64 |
|
cglobal alac_append_extra_bits_mono, 2, 5, 3, buf, exbuf, exbits, ch, len |
|
%else |
|
cglobal alac_append_extra_bits_mono, 2, 3, 3, buf, exbuf, len |
|
%define exbitsm r2m |
|
%endif |
|
movifnidn lend, r4m |
|
movd m2, exbitsm |
|
shl lend, 2 |
|
mov bufq, [bufq] |
|
mov exbufq, [exbufq] |
|
add bufq, lenq |
|
add exbufq, lenq |
|
neg lenq |
|
|
|
align 16 |
|
.loop: |
|
mova m0, [bufq + lenq] |
|
mova m1, [bufq + lenq + mmsize] |
|
pslld m0, m2 |
|
pslld m1, m2 |
|
por m0, [exbufq + lenq] |
|
por m1, [exbufq + lenq + mmsize] |
|
mova [bufq + lenq], m0 |
|
mova [bufq + lenq + mmsize], m1 |
|
|
|
add lenq, mmsize*2 |
|
jl .loop |
|
RET |
|
|