/* | |
* Alpha optimized DSP utils | |
* Copyright (c) 2002 Falk Hueffner <[email protected]> | |
* | |
* This file is part of FFmpeg. | |
* | |
* FFmpeg is free software; you can redistribute it and/or | |
* modify it under the terms of the GNU Lesser General Public | |
* License as published by the Free Software Foundation; either | |
* version 2.1 of the License, or (at your option) any later version. | |
* | |
* FFmpeg is distributed in the hope that it will be useful, | |
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
* Lesser General Public License for more details. | |
* | |
* You should have received a copy of the GNU Lesser General Public | |
* License along with FFmpeg; if not, write to the Free Software | |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
*/ | |
#include "regdef.h" | |
/* Some nicer register names. */ | |
#define ta t10 | |
#define tb t11 | |
#define tc t12 | |
#define td AT | |
/* Danger: these overlap with the argument list and the return value */ | |
#define te a5 | |
#define tf a4 | |
#define tg a3 | |
#define th v0 | |
noat | |
noreorder | |
.arch pca56 | |
/***************************************************************************** | |
* int pix_abs16x16_mvi_asm(const uint8_t *pix1, const uint8_t *pix2, int line_size) | |
* | |
* This code is written with a pca56 in mind. For ev6, one should | |
* really take the increased latency of 3 cycles for MVI instructions | |
* into account. | |
* | |
* It is important to keep the loading and first use of a register as | |
* far apart as possible, because if a register is accessed before it | |
* has been fetched from memory, the CPU will stall. | |
*/ | |
4 | |
.globl pix_abs16x16_mvi_asm | |
.ent pix_abs16x16_mvi_asm | |
pix_abs16x16_mvi_asm: | |
.frame sp, 0, ra, 0 | |
.prologue 0 | |
and a2, 7, t0 | |
clr v0 | |
beq t0, $aligned | |
4 | |
$unaligned: | |
/* Registers: | |
line 0: | |
t0: left_u -> left lo -> left | |
t1: mid | |
t2: right_u -> right hi -> right | |
t3: ref left | |
t4: ref right | |
line 1: | |
t5: left_u -> left lo -> left | |
t6: mid | |
t7: right_u -> right hi -> right | |
t8: ref left | |
t9: ref right | |
temp: | |
ta: left hi | |
tb: right lo | |
tc: error left | |
td: error right */ | |
/* load line 0 */ | |
ldq_u t0, 0(a2) # left_u | |
ldq_u t1, 8(a2) # mid | |
ldq_u t2, 16(a2) # right_u | |
ldq t3, 0(a1) # ref left | |
ldq t4, 8(a1) # ref right | |
addq a1, a3, a1 # pix1 | |
addq a2, a3, a2 # pix2 | |
/* load line 1 */ | |
ldq_u t5, 0(a2) # left_u | |
ldq_u t6, 8(a2) # mid | |
ldq_u t7, 16(a2) # right_u | |
ldq t8, 0(a1) # ref left | |
ldq t9, 8(a1) # ref right | |
addq a1, a3, a1 # pix1 | |
addq a2, a3, a2 # pix2 | |
/* calc line 0 */ | |
extql t0, a2, t0 # left lo | |
extqh t1, a2, ta # left hi | |
extql t1, a2, tb # right lo | |
or t0, ta, t0 # left | |
extqh t2, a2, t2 # right hi | |
perr t3, t0, tc # error left | |
or t2, tb, t2 # right | |
perr t4, t2, td # error right | |
addq v0, tc, v0 # add error left | |
addq v0, td, v0 # add error left | |
/* calc line 1 */ | |
extql t5, a2, t5 # left lo | |
extqh t6, a2, ta # left hi | |
extql t6, a2, tb # right lo | |
or t5, ta, t5 # left | |
extqh t7, a2, t7 # right hi | |
perr t8, t5, tc # error left | |
or t7, tb, t7 # right | |
perr t9, t7, td # error right | |
addq v0, tc, v0 # add error left | |
addq v0, td, v0 # add error left | |
/* loop */ | |
subq a4, 2, a4 # h -= 2 | |
bne a4, $unaligned | |
ret | |
4 | |
$aligned: | |
/* load line 0 */ | |
ldq t0, 0(a2) # left | |
ldq t1, 8(a2) # right | |
addq a2, a3, a2 # pix2 | |
ldq t2, 0(a1) # ref left | |
ldq t3, 8(a1) # ref right | |
addq a1, a3, a1 # pix1 | |
/* load line 1 */ | |
ldq t4, 0(a2) # left | |
ldq t5, 8(a2) # right | |
addq a2, a3, a2 # pix2 | |
ldq t6, 0(a1) # ref left | |
ldq t7, 8(a1) # ref right | |
addq a1, a3, a1 # pix1 | |
/* load line 2 */ | |
ldq t8, 0(a2) # left | |
ldq t9, 8(a2) # right | |
addq a2, a3, a2 # pix2 | |
ldq ta, 0(a1) # ref left | |
ldq tb, 8(a1) # ref right | |
addq a1, a3, a1 # pix1 | |
/* load line 3 */ | |
ldq tc, 0(a2) # left | |
ldq td, 8(a2) # right | |
addq a2, a3, a2 # pix2 | |
ldq te, 0(a1) # ref left | |
ldq a0, 8(a1) # ref right | |
/* calc line 0 */ | |
perr t0, t2, t0 # error left | |
addq a1, a3, a1 # pix1 | |
perr t1, t3, t1 # error right | |
addq v0, t0, v0 # add error left | |
/* calc line 1 */ | |
perr t4, t6, t0 # error left | |
addq v0, t1, v0 # add error right | |
perr t5, t7, t1 # error right | |
addq v0, t0, v0 # add error left | |
/* calc line 2 */ | |
perr t8, ta, t0 # error left | |
addq v0, t1, v0 # add error right | |
perr t9, tb, t1 # error right | |
addq v0, t0, v0 # add error left | |
/* calc line 3 */ | |
perr tc, te, t0 # error left | |
addq v0, t1, v0 # add error right | |
perr td, a0, t1 # error right | |
addq v0, t0, v0 # add error left | |
addq v0, t1, v0 # add error right | |
/* loop */ | |
subq a4, 4, a4 # h -= 4 | |
bne a4, $aligned | |
ret | |
pix_abs16x16_mvi_asm | |