|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.macro function name, align=DEFAULT_ALIGN |
|
.macro endfunc |
|
jirl $r0, $r1, 0x0 |
|
.size ASM_PREF\name, . - ASM_PREF\name |
|
.purgem endfunc |
|
.endm |
|
.text ; |
|
.align \align ; |
|
.globl ASM_PREF\name ; |
|
.type ASM_PREF\name, @function ; |
|
ASM_PREF\name: ; |
|
.endm |
|
|
|
|
|
|
|
|
|
|
|
.macro alloc_stack size, align=0 |
|
.if \align |
|
.macro clean_stack |
|
add.d sp, sp, t7 |
|
.endm |
|
addi.d sp, sp, - \size |
|
andi.d t7, sp, \align - 1 |
|
sub.d sp, sp, t7 |
|
addi.d t7, t7, \size |
|
.else |
|
.macro clean_stack |
|
addi.d sp, sp, \size |
|
.endm |
|
addi.d sp, sp, - \size |
|
.endif |
|
.endm |
|
|
|
.macro const name, align=DEFAULT_ALIGN |
|
.macro endconst |
|
.size \name, . - \name |
|
.purgem endconst |
|
.endm |
|
.section .rodata |
|
.align \align |
|
\name: |
|
.endm |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.macro vdp2.h.bu vd, vj, vk |
|
vmulwev.h.bu \vd, \vj, \vk |
|
vmaddwod.h.bu \vd, \vj, \vk |
|
.endm |
|
|
|
.macro vdp2.h.bu.b vd, vj, vk |
|
vmulwev.h.bu.b \vd, \vj, \vk |
|
vmaddwod.h.bu.b \vd, \vj, \vk |
|
.endm |
|
|
|
.macro vdp2.w.h vd, vj, vk |
|
vmulwev.w.h \vd, \vj, \vk |
|
vmaddwod.w.h \vd, \vj, \vk |
|
.endm |
|
|
|
.macro xvdp2.h.bu xd, xj, xk |
|
xvmulwev.h.bu \xd, \xj, \xk |
|
xvmaddwod.h.bu \xd, \xj, \xk |
|
.endm |
|
|
|
.macro xvdp2.h.bu.b xd, xj, xk |
|
xvmulwev.h.bu.b \xd, \xj, \xk |
|
xvmaddwod.h.bu.b \xd, \xj, \xk |
|
.endm |
|
|
|
.macro xvdp2.w.h xd, xj, xk |
|
xvmulwev.w.h \xd, \xj, \xk |
|
xvmaddwod.w.h \xd, \xj, \xk |
|
.endm |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.macro vdp2add.h.bu vd, vj, vk |
|
vmaddwev.h.bu \vd, \vj, \vk |
|
vmaddwod.h.bu \vd, \vj, \vk |
|
.endm |
|
|
|
.macro vdp2add.h.bu.b vd, vj, vk |
|
vmaddwev.h.bu.b \vd, \vj, \vk |
|
vmaddwod.h.bu.b \vd, \vj, \vk |
|
.endm |
|
|
|
.macro vdp2add.w.h vd, vj, vk |
|
vmaddwev.w.h \vd, \vj, \vk |
|
vmaddwod.w.h \vd, \vj, \vk |
|
.endm |
|
|
|
.macro xvdp2add.h.bu.b xd, xj, xk |
|
xvmaddwev.h.bu.b \xd, \xj, \xk |
|
xvmaddwod.h.bu.b \xd, \xj, \xk |
|
.endm |
|
|
|
.macro xvdp2add.w.h xd, xj, xk |
|
xvmaddwev.w.h \xd, \xj, \xk |
|
xvmaddwod.w.h \xd, \xj, \xk |
|
.endm |
|
|
|
|
|
|
|
|
|
|
|
|
|
.macro vclip.h vd, vj, vk, va |
|
vmax.h \vd, \vj, \vk |
|
vmin.h \vd, \vd, \va |
|
.endm |
|
|
|
.macro vclip255.w vd, vj |
|
vmaxi.w \vd, \vj, 0 |
|
vsat.wu \vd, \vd, 7 |
|
.endm |
|
|
|
.macro vclip255.h vd, vj |
|
vmaxi.h \vd, \vj, 0 |
|
vsat.hu \vd, \vd, 7 |
|
.endm |
|
|
|
.macro xvclip.h xd, xj, xk, xa |
|
xvmax.h \xd, \xj, \xk |
|
xvmin.h \xd, \xd, \xa |
|
.endm |
|
|
|
.macro xvclip255.h xd, xj |
|
xvmaxi.h \xd, \xj, 0 |
|
xvsat.hu \xd, \xd, 7 |
|
.endm |
|
|
|
.macro xvclip255.w xd, xj |
|
xvmaxi.w \xd, \xj, 0 |
|
xvsat.wu \xd, \xd, 7 |
|
.endm |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.macro vstelmx.b vd, rk, ra, si |
|
add.d \rk, \rk, \ra |
|
vstelm.b \vd, \rk, 0, \si |
|
.endm |
|
|
|
.macro vstelmx.h vd, rk, ra, si |
|
add.d \rk, \rk, \ra |
|
vstelm.h \vd, \rk, 0, \si |
|
.endm |
|
|
|
.macro vstelmx.w vd, rk, ra, si |
|
add.d \rk, \rk, \ra |
|
vstelm.w \vd, \rk, 0, \si |
|
.endm |
|
|
|
.macro vstelmx.d vd, rk, ra, si |
|
add.d \rk, \rk, \ra |
|
vstelm.d \vd, \rk, 0, \si |
|
.endm |
|
|
|
.macro vmov xd, xj |
|
vor.v \xd, \xj, \xj |
|
.endm |
|
|
|
.macro xmov xd, xj |
|
xvor.v \xd, \xj, \xj |
|
.endm |
|
|
|
.macro xvstelmx.d xd, rk, ra, si |
|
add.d \rk, \rk, \ra |
|
xvstelm.d \xd, \rk, 0, \si |
|
.endm |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.macro FLDS_LOADX_4 src, stride, stride2, stride3, out0, out1, out2, out3 |
|
fld.s \out0, \src, 0 |
|
fldx.s \out1, \src, \stride |
|
fldx.s \out2, \src, \stride2 |
|
fldx.s \out3, \src, \stride3 |
|
.endm |
|
|
|
.macro FLDD_LOADX_4 src, stride, stride2, stride3, out0, out1, out2, out3 |
|
fld.d \out0, \src, 0 |
|
fldx.d \out1, \src, \stride |
|
fldx.d \out2, \src, \stride2 |
|
fldx.d \out3, \src, \stride3 |
|
.endm |
|
|
|
.macro LSX_LOADX_4 src, stride, stride2, stride3, out0, out1, out2, out3 |
|
vld \out0, \src, 0 |
|
vldx \out1, \src, \stride |
|
vldx \out2, \src, \stride2 |
|
vldx \out3, \src, \stride3 |
|
.endm |
|
|
|
.macro LASX_LOADX_4 src, stride, stride2, stride3, out0, out1, out2, out3 |
|
xvld \out0, \src, 0 |
|
xvldx \out1, \src, \stride |
|
xvldx \out2, \src, \stride2 |
|
xvldx \out3, \src, \stride3 |
|
.endm |
|
|
|
|
|
|
|
|
|
|
|
|
|
.macro LSX_TRANSPOSE4x4_H in0, in1, in2, in3, out0, out1, out2, out3, \ |
|
tmp0, tmp1 |
|
vilvl.h \tmp0, \in1, \in0 |
|
vilvl.h \tmp1, \in3, \in2 |
|
vilvl.w \out0, \tmp1, \tmp0 |
|
vilvh.w \out2, \tmp1, \tmp0 |
|
vilvh.d \out1, \out0, \out0 |
|
vilvh.d \out3, \out0, \out2 |
|
.endm |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.macro LSX_TRANSPOSE4x4_W _in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3, \ |
|
_tmp0, _tmp1 |
|
|
|
vilvl.w \_tmp0, \_in1, \_in0 |
|
vilvh.w \_out1, \_in1, \_in0 |
|
vilvl.w \_tmp1, \_in3, \_in2 |
|
vilvh.w \_out3, \_in3, \_in2 |
|
|
|
vilvl.d \_out0, \_tmp1, \_tmp0 |
|
vilvl.d \_out2, \_out3, \_out1 |
|
vilvh.d \_out3, \_out3, \_out1 |
|
vilvh.d \_out1, \_tmp1, \_tmp0 |
|
.endm |
|
|
|
|
|
|
|
|
|
|
|
|
|
.macro LSX_TRANSPOSE8x8_H in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \ |
|
out2, out3, out4, out5, out6, out7, tmp0, tmp1, tmp2, \ |
|
tmp3, tmp4, tmp5, tmp6, tmp7 |
|
vilvl.h \tmp0, \in6, \in4 |
|
vilvl.h \tmp1, \in7, \in5 |
|
vilvl.h \tmp2, \in2, \in0 |
|
vilvl.h \tmp3, \in3, \in1 |
|
|
|
vilvl.h \tmp4, \tmp1, \tmp0 |
|
vilvh.h \tmp5, \tmp1, \tmp0 |
|
vilvl.h \tmp6, \tmp3, \tmp2 |
|
vilvh.h \tmp7, \tmp3, \tmp2 |
|
|
|
vilvh.h \tmp0, \in6, \in4 |
|
vilvh.h \tmp1, \in7, \in5 |
|
vilvh.h \tmp2, \in2, \in0 |
|
vilvh.h \tmp3, \in3, \in1 |
|
|
|
vpickev.d \out0, \tmp4, \tmp6 |
|
vpickod.d \out1, \tmp4, \tmp6 |
|
vpickev.d \out2, \tmp5, \tmp7 |
|
vpickod.d \out3, \tmp5, \tmp7 |
|
|
|
vilvl.h \tmp4, \tmp1, \tmp0 |
|
vilvh.h \tmp5, \tmp1, \tmp0 |
|
vilvl.h \tmp6, \tmp3, \tmp2 |
|
vilvh.h \tmp7, \tmp3, \tmp2 |
|
|
|
vpickev.d \out4, \tmp4, \tmp6 |
|
vpickod.d \out5, \tmp4, \tmp6 |
|
vpickev.d \out6, \tmp5, \tmp7 |
|
vpickod.d \out7, \tmp5, \tmp7 |
|
.endm |
|
|
|
|
|
|
|
|
|
|
|
|
|
.macro LASX_TRANSPOSE16X8_B in0, in1, in2, in3, in4, in5, in6, in7, \ |
|
in8, in9, in10, in11, in12, in13, in14, in15, \ |
|
out0, out1, out2, out3, out4, out5, out6, out7,\ |
|
tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7 |
|
xvilvl.b \tmp0, \in2, \in0 |
|
xvilvl.b \tmp1, \in3, \in1 |
|
xvilvl.b \tmp2, \in6, \in4 |
|
xvilvl.b \tmp3, \in7, \in5 |
|
xvilvl.b \tmp4, \in10, \in8 |
|
xvilvl.b \tmp5, \in11, \in9 |
|
xvilvl.b \tmp6, \in14, \in12 |
|
xvilvl.b \tmp7, \in15, \in13 |
|
xvilvl.b \out0, \tmp1, \tmp0 |
|
xvilvh.b \out1, \tmp1, \tmp0 |
|
xvilvl.b \out2, \tmp3, \tmp2 |
|
xvilvh.b \out3, \tmp3, \tmp2 |
|
xvilvl.b \out4, \tmp5, \tmp4 |
|
xvilvh.b \out5, \tmp5, \tmp4 |
|
xvilvl.b \out6, \tmp7, \tmp6 |
|
xvilvh.b \out7, \tmp7, \tmp6 |
|
xvilvl.w \tmp0, \out2, \out0 |
|
xvilvh.w \tmp2, \out2, \out0 |
|
xvilvl.w \tmp4, \out3, \out1 |
|
xvilvh.w \tmp6, \out3, \out1 |
|
xvilvl.w \tmp1, \out6, \out4 |
|
xvilvh.w \tmp3, \out6, \out4 |
|
xvilvl.w \tmp5, \out7, \out5 |
|
xvilvh.w \tmp7, \out7, \out5 |
|
xvilvl.d \out0, \tmp1, \tmp0 |
|
xvilvh.d \out1, \tmp1, \tmp0 |
|
xvilvl.d \out2, \tmp3, \tmp2 |
|
xvilvh.d \out3, \tmp3, \tmp2 |
|
xvilvl.d \out4, \tmp5, \tmp4 |
|
xvilvh.d \out5, \tmp5, \tmp4 |
|
xvilvl.d \out6, \tmp7, \tmp6 |
|
xvilvh.d \out7, \tmp7, \tmp6 |
|
.endm |
|
|
|
|
|
|
|
|
|
|
|
|
|
.macro LSX_TRANSPOSE16X8_B in0, in1, in2, in3, in4, in5, in6, in7, \ |
|
in8, in9, in10, in11, in12, in13, in14, in15, \ |
|
out0, out1, out2, out3, out4, out5, out6, out7,\ |
|
tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7 |
|
vilvl.b \tmp0, \in2, \in0 |
|
vilvl.b \tmp1, \in3, \in1 |
|
vilvl.b \tmp2, \in6, \in4 |
|
vilvl.b \tmp3, \in7, \in5 |
|
vilvl.b \tmp4, \in10, \in8 |
|
vilvl.b \tmp5, \in11, \in9 |
|
vilvl.b \tmp6, \in14, \in12 |
|
vilvl.b \tmp7, \in15, \in13 |
|
|
|
vilvl.b \out0, \tmp1, \tmp0 |
|
vilvh.b \out1, \tmp1, \tmp0 |
|
vilvl.b \out2, \tmp3, \tmp2 |
|
vilvh.b \out3, \tmp3, \tmp2 |
|
vilvl.b \out4, \tmp5, \tmp4 |
|
vilvh.b \out5, \tmp5, \tmp4 |
|
vilvl.b \out6, \tmp7, \tmp6 |
|
vilvh.b \out7, \tmp7, \tmp6 |
|
vilvl.w \tmp0, \out2, \out0 |
|
vilvh.w \tmp2, \out2, \out0 |
|
vilvl.w \tmp4, \out3, \out1 |
|
vilvh.w \tmp6, \out3, \out1 |
|
vilvl.w \tmp1, \out6, \out4 |
|
vilvh.w \tmp3, \out6, \out4 |
|
vilvl.w \tmp5, \out7, \out5 |
|
vilvh.w \tmp7, \out7, \out5 |
|
vilvl.d \out0, \tmp1, \tmp0 |
|
vilvh.d \out1, \tmp1, \tmp0 |
|
vilvl.d \out2, \tmp3, \tmp2 |
|
vilvh.d \out3, \tmp3, \tmp2 |
|
vilvl.d \out4, \tmp5, \tmp4 |
|
vilvh.d \out5, \tmp5, \tmp4 |
|
vilvl.d \out6, \tmp7, \tmp6 |
|
vilvh.d \out7, \tmp7, \tmp6 |
|
.endm |
|
|
|
|
|
|
|
|
|
|
|
|
|
.macro LASX_TRANSPOSE4x4_H in0, in1, in2, in3, out0, out1, out2, out3, \ |
|
tmp0, tmp1 |
|
xvilvl.h \tmp0, \in1, \in0 |
|
xvilvl.h \tmp1, \in3, \in2 |
|
xvilvl.w \out0, \tmp1, \tmp0 |
|
xvilvh.w \out2, \tmp1, \tmp0 |
|
xvilvh.d \out1, \out0, \out0 |
|
xvilvh.d \out3, \out0, \out2 |
|
.endm |
|
|
|
|
|
|
|
|
|
|
|
|
|
.macro LASX_TRANSPOSE4x8_H in0, in1, in2, in3, out0, out1, out2, out3, \ |
|
tmp0, tmp1 |
|
xvilvl.h \tmp0, \in2, \in0 |
|
xvilvl.h \tmp1, \in3, \in1 |
|
xvilvl.h \out2, \tmp1, \tmp0 |
|
xvilvh.h \out3, \tmp1, \tmp0 |
|
|
|
xvilvl.d \out0, \out2, \out2 |
|
xvilvh.d \out1, \out2, \out2 |
|
xvilvl.d \out2, \out3, \out3 |
|
xvilvh.d \out3, \out3, \out3 |
|
.endm |
|
|
|
|
|
|
|
|
|
|
|
|
|
.macro LASX_TRANSPOSE8x8_H in0, in1, in2, in3, in4, in5, in6, in7, \ |
|
out0, out1, out2, out3, out4, out5, out6, out7, \ |
|
tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7 |
|
xvilvl.h \tmp0, \in6, \in4 |
|
xvilvl.h \tmp1, \in7, \in5 |
|
xvilvl.h \tmp2, \in2, \in0 |
|
xvilvl.h \tmp3, \in3, \in1 |
|
|
|
xvilvl.h \tmp4, \tmp1, \tmp0 |
|
xvilvh.h \tmp5, \tmp1, \tmp0 |
|
xvilvl.h \tmp6, \tmp3, \tmp2 |
|
xvilvh.h \tmp7, \tmp3, \tmp2 |
|
|
|
xvilvh.h \tmp0, \in6, \in4 |
|
xvilvh.h \tmp1, \in7, \in5 |
|
xvilvh.h \tmp2, \in2, \in0 |
|
xvilvh.h \tmp3, \in3, \in1 |
|
|
|
xvpickev.d \out0, \tmp4, \tmp6 |
|
xvpickod.d \out1, \tmp4, \tmp6 |
|
xvpickev.d \out2, \tmp5, \tmp7 |
|
xvpickod.d \out3, \tmp5, \tmp7 |
|
|
|
xvilvl.h \tmp4, \tmp1, \tmp0 |
|
xvilvh.h \tmp5, \tmp1, \tmp0 |
|
xvilvl.h \tmp6, \tmp3, \tmp2 |
|
xvilvh.h \tmp7, \tmp3, \tmp2 |
|
|
|
xvpickev.d \out4, \tmp4, \tmp6 |
|
xvpickod.d \out5, \tmp4, \tmp6 |
|
xvpickev.d \out6, \tmp5, \tmp7 |
|
xvpickod.d \out7, \tmp5, \tmp7 |
|
.endm |
|
|
|
|
|
|
|
|
|
|
|
|
|
.macro LASX_TRANSPOSE2x4x4_H in0, in1, in2, in3, out0, out1, out2, out3, \ |
|
tmp0, tmp1, tmp2 |
|
xvilvh.h \tmp1, \in0, \in1 |
|
xvilvl.h \out1, \in0, \in1 |
|
xvilvh.h \tmp0, \in2, \in3 |
|
xvilvl.h \out3, \in2, \in3 |
|
|
|
xvilvh.w \tmp2, \out3, \out1 |
|
xvilvl.w \out3, \out3, \out1 |
|
|
|
xvilvl.w \out2, \tmp0, \tmp1 |
|
xvilvh.w \tmp1, \tmp0, \tmp1 |
|
|
|
xvilvh.d \out0, \out2, \out3 |
|
xvilvl.d \out2, \out2, \out3 |
|
xvilvh.d \out1, \tmp1, \tmp2 |
|
xvilvl.d \out3, \tmp1, \tmp2 |
|
.endm |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.macro LASX_TRANSPOSE4x4_W _in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3, \ |
|
_tmp0, _tmp1 |
|
|
|
xvilvl.w \_tmp0, \_in1, \_in0 |
|
xvilvh.w \_out1, \_in1, \_in0 |
|
xvilvl.w \_tmp1, \_in3, \_in2 |
|
xvilvh.w \_out3, \_in3, \_in2 |
|
|
|
xvilvl.d \_out0, \_tmp1, \_tmp0 |
|
xvilvl.d \_out2, \_out3, \_out1 |
|
xvilvh.d \_out3, \_out3, \_out1 |
|
xvilvh.d \_out1, \_tmp1, \_tmp0 |
|
.endm |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.macro LASX_TRANSPOSE8x8_W _in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7,\ |
|
_out0, _out1, _out2, _out3, _out4, _out5, _out6, _out7,\ |
|
_tmp0, _tmp1, _tmp2, _tmp3 |
|
xvilvl.w \_tmp0, \_in2, \_in0 |
|
xvilvl.w \_tmp1, \_in3, \_in1 |
|
xvilvh.w \_tmp2, \_in2, \_in0 |
|
xvilvh.w \_tmp3, \_in3, \_in1 |
|
xvilvl.w \_out0, \_tmp1, \_tmp0 |
|
xvilvh.w \_out1, \_tmp1, \_tmp0 |
|
xvilvl.w \_out2, \_tmp3, \_tmp2 |
|
xvilvh.w \_out3, \_tmp3, \_tmp2 |
|
|
|
xvilvl.w \_tmp0, \_in6, \_in4 |
|
xvilvl.w \_tmp1, \_in7, \_in5 |
|
xvilvh.w \_tmp2, \_in6, \_in4 |
|
xvilvh.w \_tmp3, \_in7, \_in5 |
|
xvilvl.w \_out4, \_tmp1, \_tmp0 |
|
xvilvh.w \_out5, \_tmp1, \_tmp0 |
|
xvilvl.w \_out6, \_tmp3, \_tmp2 |
|
xvilvh.w \_out7, \_tmp3, \_tmp2 |
|
|
|
xmov \_tmp0, \_out0 |
|
xmov \_tmp1, \_out1 |
|
xmov \_tmp2, \_out2 |
|
xmov \_tmp3, \_out3 |
|
xvpermi.q \_out0, \_out4, 0x02 |
|
xvpermi.q \_out1, \_out5, 0x02 |
|
xvpermi.q \_out2, \_out6, 0x02 |
|
xvpermi.q \_out3, \_out7, 0x02 |
|
xvpermi.q \_out4, \_tmp0, 0x31 |
|
xvpermi.q \_out5, \_tmp1, 0x31 |
|
xvpermi.q \_out6, \_tmp2, 0x31 |
|
xvpermi.q \_out7, \_tmp3, 0x31 |
|
.endm |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.macro LASX_TRANSPOSE4x4_D _in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3, \ |
|
_tmp0, _tmp1 |
|
xvilvl.d \_tmp0, \_in1, \_in0 |
|
xvilvh.d \_out1, \_in1, \_in0 |
|
xvilvh.d \_tmp1, \_in3, \_in2 |
|
xvilvl.d \_out2, \_in3, \_in2 |
|
|
|
xvor.v \_out0, \_tmp0, \_tmp0 |
|
xvor.v \_out3, \_tmp1, \_tmp1 |
|
|
|
xvpermi.q \_out0, \_out2, 0x02 |
|
xvpermi.q \_out2, \_tmp0, 0x31 |
|
xvpermi.q \_out3, \_out1, 0x31 |
|
xvpermi.q \_out1, \_tmp1, 0x02 |
|
.endm |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.macro LSX_BUTTERFLY_4_B _in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3 |
|
vadd.b \_out0, \_in0, \_in3 |
|
vadd.b \_out1, \_in1, \_in2 |
|
vsub.b \_out2, \_in1, \_in2 |
|
vsub.b \_out3, \_in0, \_in3 |
|
.endm |
|
.macro LSX_BUTTERFLY_4_H _in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3 |
|
vadd.h \_out0, \_in0, \_in3 |
|
vadd.h \_out1, \_in1, \_in2 |
|
vsub.h \_out2, \_in1, \_in2 |
|
vsub.h \_out3, \_in0, \_in3 |
|
.endm |
|
.macro LSX_BUTTERFLY_4_W _in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3 |
|
vadd.w \_out0, \_in0, \_in3 |
|
vadd.w \_out1, \_in1, \_in2 |
|
vsub.w \_out2, \_in1, \_in2 |
|
vsub.w \_out3, \_in0, \_in3 |
|
.endm |
|
.macro LSX_BUTTERFLY_4_D _in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3 |
|
vadd.d \_out0, \_in0, \_in3 |
|
vadd.d \_out1, \_in1, \_in2 |
|
vsub.d \_out2, \_in1, \_in2 |
|
vsub.d \_out3, \_in0, \_in3 |
|
.endm |
|
|
|
.macro LASX_BUTTERFLY_4_B _in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3 |
|
xvadd.b \_out0, \_in0, \_in3 |
|
xvadd.b \_out1, \_in1, \_in2 |
|
xvsub.b \_out2, \_in1, \_in2 |
|
xvsub.b \_out3, \_in0, \_in3 |
|
.endm |
|
.macro LASX_BUTTERFLY_4_H _in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3 |
|
xvadd.h \_out0, \_in0, \_in3 |
|
xvadd.h \_out1, \_in1, \_in2 |
|
xvsub.h \_out2, \_in1, \_in2 |
|
xvsub.h \_out3, \_in0, \_in3 |
|
.endm |
|
.macro LASX_BUTTERFLY_4_W _in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3 |
|
xvadd.w \_out0, \_in0, \_in3 |
|
xvadd.w \_out1, \_in1, \_in2 |
|
xvsub.w \_out2, \_in1, \_in2 |
|
xvsub.w \_out3, \_in0, \_in3 |
|
.endm |
|
.macro LASX_BUTTERFLY_4_D _in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3 |
|
xvadd.d \_out0, \_in0, \_in3 |
|
xvadd.d \_out1, \_in1, \_in2 |
|
xvsub.d \_out2, \_in1, \_in2 |
|
xvsub.d \_out3, \_in0, \_in3 |
|
.endm |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.macro LSX_BUTTERFLY_8_B _in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \ |
|
_out0, _out1, _out2, _out3, _out4, _out5, _out6, _out7 |
|
vadd.b \_out0, \_in0, \_in7 |
|
vadd.b \_out1, \_in1, \_in6 |
|
vadd.b \_out2, \_in2, \_in5 |
|
vadd.b \_out3, \_in3, \_in4 |
|
vsub.b \_out4, \_in3, \_in4 |
|
vsub.b \_out5, \_in2, \_in5 |
|
vsub.b \_out6, \_in1, \_in6 |
|
vsub.b \_out7, \_in0, \_in7 |
|
.endm |
|
|
|
.macro LSX_BUTTERFLY_8_H _in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \ |
|
_out0, _out1, _out2, _out3, _out4, _out5, _out6, _out7 |
|
vadd.h \_out0, \_in0, \_in7 |
|
vadd.h \_out1, \_in1, \_in6 |
|
vadd.h \_out2, \_in2, \_in5 |
|
vadd.h \_out3, \_in3, \_in4 |
|
vsub.h \_out4, \_in3, \_in4 |
|
vsub.h \_out5, \_in2, \_in5 |
|
vsub.h \_out6, \_in1, \_in6 |
|
vsub.h \_out7, \_in0, \_in7 |
|
.endm |
|
|
|
.macro LSX_BUTTERFLY_8_W _in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \ |
|
_out0, _out1, _out2, _out3, _out4, _out5, _out6, _out7 |
|
vadd.w \_out0, \_in0, \_in7 |
|
vadd.w \_out1, \_in1, \_in6 |
|
vadd.w \_out2, \_in2, \_in5 |
|
vadd.w \_out3, \_in3, \_in4 |
|
vsub.w \_out4, \_in3, \_in4 |
|
vsub.w \_out5, \_in2, \_in5 |
|
vsub.w \_out6, \_in1, \_in6 |
|
vsub.w \_out7, \_in0, \_in7 |
|
.endm |
|
|
|
.macro LSX_BUTTERFLY_8_D _in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \ |
|
_out0, _out1, _out2, _out3, _out4, _out5, _out6, _out7 |
|
vadd.d \_out0, \_in0, \_in7 |
|
vadd.d \_out1, \_in1, \_in6 |
|
vadd.d \_out2, \_in2, \_in5 |
|
vadd.d \_out3, \_in3, \_in4 |
|
vsub.d \_out4, \_in3, \_in4 |
|
vsub.d \_out5, \_in2, \_in5 |
|
vsub.d \_out6, \_in1, \_in6 |
|
vsub.d \_out7, \_in0, \_in7 |
|
.endm |
|
|
|
.macro LASX_BUTTERFLY_8_B _in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \ |
|
_out0, _out1, _out2, _out3, _out4, _out5, _out6, _out7 |
|
xvadd.b \_out0, \_in0, \_in7 |
|
xvadd.b \_out1, \_in1, \_in6 |
|
xvadd.b \_out2, \_in2, \_in5 |
|
xvadd.b \_out3, \_in3, \_in4 |
|
xvsub.b \_out4, \_in3, \_in4 |
|
xvsub.b \_out5, \_in2, \_in5 |
|
xvsub.b \_out6, \_in1, \_in6 |
|
xvsub.b \_out7, \_in0, \_in7 |
|
.endm |
|
|
|
.macro LASX_BUTTERFLY_8_H _in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \ |
|
_out0, _out1, _out2, _out3, _out4, _out5, _out6, _out7 |
|
xvadd.h \_out0, \_in0, \_in7 |
|
xvadd.h \_out1, \_in1, \_in6 |
|
xvadd.h \_out2, \_in2, \_in5 |
|
xvadd.h \_out3, \_in3, \_in4 |
|
xvsub.h \_out4, \_in3, \_in4 |
|
xvsub.h \_out5, \_in2, \_in5 |
|
xvsub.h \_out6, \_in1, \_in6 |
|
xvsub.h \_out7, \_in0, \_in7 |
|
.endm |
|
|
|
.macro LASX_BUTTERFLY_8_W _in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \ |
|
_out0, _out1, _out2, _out3, _out4, _out5, _out6, _out7 |
|
xvadd.w \_out0, \_in0, \_in7 |
|
xvadd.w \_out1, \_in1, \_in6 |
|
xvadd.w \_out2, \_in2, \_in5 |
|
xvadd.w \_out3, \_in3, \_in4 |
|
xvsub.w \_out4, \_in3, \_in4 |
|
xvsub.w \_out5, \_in2, \_in5 |
|
xvsub.w \_out6, \_in1, \_in6 |
|
xvsub.w \_out7, \_in0, \_in7 |
|
.endm |
|
|