camenduru
/

ffmpeg-cuda

Model card Files Files and versions Community

ffmpeg-cuda / libavcodec /arm /idctdsp_arm.S

camenduru's picture

thanks to ffmpeg ❤

8ead80b almost 2 years ago

history blame contribute delete

4.43 kB

	@
	@ ARMv4-optimized IDCT functions
	@ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
	@
	@ This file is part of FFmpeg.
	@
	@ FFmpeg is free software; you can redistribute it and/or
	@ modify it under the terms of the GNU Lesser General Public
	@ License as published by the Free Software Foundation; either
	@ version 2.1 of the License, or (at your option) any later version.
	@
	@ FFmpeg is distributed in the hope that it will be useful,
	@ but WITHOUT ANY WARRANTY; without even the implied warranty of
	@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	@ Lesser General Public License for more details.
	@
	@ You should have received a copy of the GNU Lesser General Public
	@ License along with FFmpeg; if not, write to the Free Software
	@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
	@

	#include "config.h"
	#include "libavutil/arm/asm.S"

	@ void ff_add_pixels_clamped_arm(int16_t block, uint8_t dest, ptrdiff_t stride)
	function ff_add_pixels_clamped_arm, export=1, align=5
	push {r4-r10}
	mov r10, #8
	1:
	ldr r4, [r1] /* load dest */
	/* block[0] and block[1]*/
	ldrsh r5, [r0]
	ldrsh r7, [r0, #2]
	and r6, r4, #0xFF
	and r8, r4, #0xFF00
	add r6, r6, r5
	add r8, r7, r8, lsr #8
	mvn r5, r5
	mvn r7, r7
	tst r6, #0x100
	it ne
	movne r6, r5, lsr #24
	tst r8, #0x100
	it ne
	movne r8, r7, lsr #24
	mov r9, r6
	ldrsh r5, [r0, #4] /* moved form [A] */
	orr r9, r9, r8, lsl #8
	/* block[2] and block[3] */
	/* [A] */
	ldrsh r7, [r0, #6]
	and r6, r4, #0xFF0000
	and r8, r4, #0xFF000000
	add r6, r5, r6, lsr #16
	add r8, r7, r8, lsr #24
	mvn r5, r5
	mvn r7, r7
	tst r6, #0x100
	it ne
	movne r6, r5, lsr #24
	tst r8, #0x100
	it ne
	movne r8, r7, lsr #24
	orr r9, r9, r6, lsl #16
	ldr r4, [r1, #4] /* moved form [B] */
	orr r9, r9, r8, lsl #24
	/* store dest */
	ldrsh r5, [r0, #8] /* moved form [C] */
	str r9, [r1]

	/* load dest */
	/* [B] */
	/* block[4] and block[5] */
	/* [C] */
	ldrsh r7, [r0, #10]
	and r6, r4, #0xFF
	and r8, r4, #0xFF00
	add r6, r6, r5
	add r8, r7, r8, lsr #8
	mvn r5, r5
	mvn r7, r7
	tst r6, #0x100
	it ne
	movne r6, r5, lsr #24
	tst r8, #0x100
	it ne
	movne r8, r7, lsr #24
	mov r9, r6
	ldrsh r5, [r0, #12] /* moved from [D] */
	orr r9, r9, r8, lsl #8
	/* block[6] and block[7] */
	/* [D] */
	ldrsh r7, [r0, #14]
	and r6, r4, #0xFF0000
	and r8, r4, #0xFF000000
	add r6, r5, r6, lsr #16
	add r8, r7, r8, lsr #24
	mvn r5, r5
	mvn r7, r7
	tst r6, #0x100
	it ne
	movne r6, r5, lsr #24
	tst r8, #0x100
	it ne
	movne r8, r7, lsr #24
	orr r9, r9, r6, lsl #16
	add r0, r0, #16 /* moved from [E] */
	orr r9, r9, r8, lsl #24
	subs r10, r10, #1 /* moved from [F] */
	/* store dest */
	str r9, [r1, #4]

	/* [E] */
	/* [F] */
	add r1, r1, r2
	bne 1b

	pop {r4-r10}
	bx lr
	endfunc