File size: 7,725 Bytes
8ead80b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 |
/*
* Alpha optimized DSP utils
* Copyright (c) 2002 Falk Hueffner <[email protected]>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVCODEC_ALPHA_ASM_H
#define AVCODEC_ALPHA_ASM_H
#include <inttypes.h>
#include "libavutil/common.h"
#if AV_GCC_VERSION_AT_LEAST(2,96)
# define likely(x) __builtin_expect((x) != 0, 1)
# define unlikely(x) __builtin_expect((x) != 0, 0)
#else
# define likely(x) (x)
# define unlikely(x) (x)
#endif
#define AMASK_BWX (1 << 0)
#define AMASK_FIX (1 << 1)
#define AMASK_CIX (1 << 2)
#define AMASK_MVI (1 << 8)
static inline uint64_t BYTE_VEC(uint64_t x)
{
x |= x << 8;
x |= x << 16;
x |= x << 32;
return x;
}
static inline uint64_t WORD_VEC(uint64_t x)
{
x |= x << 16;
x |= x << 32;
return x;
}
#define sextw(x) ((int16_t) (x))
#ifdef __GNUC__
#define ldq(p) \
(((const union { \
uint64_t __l; \
__typeof__(*(p)) __s[sizeof (uint64_t) / sizeof *(p)]; \
} *) (p))->__l)
#define ldl(p) \
(((const union { \
int32_t __l; \
__typeof__(*(p)) __s[sizeof (int32_t) / sizeof *(p)]; \
} *) (p))->__l)
#define stq(l, p) \
do { \
(((union { \
uint64_t __l; \
__typeof__(*(p)) __s[sizeof (uint64_t) / sizeof *(p)]; \
} *) (p))->__l) = l; \
} while (0)
#define stl(l, p) \
do { \
(((union { \
int32_t __l; \
__typeof__(*(p)) __s[sizeof (int32_t) / sizeof *(p)]; \
} *) (p))->__l) = l; \
} while (0)
struct unaligned_long { uint64_t l; } __attribute__((packed));
#define ldq_u(p) (*(const uint64_t *) (((uint64_t) (p)) & ~7ul))
#define uldq(a) (((const struct unaligned_long *) (a))->l)
#if AV_GCC_VERSION_AT_LEAST(3,3)
#define prefetch(p) __builtin_prefetch((p), 0, 1)
#define prefetch_en(p) __builtin_prefetch((p), 0, 0)
#define prefetch_m(p) __builtin_prefetch((p), 1, 1)
#define prefetch_men(p) __builtin_prefetch((p), 1, 0)
#define cmpbge __builtin_alpha_cmpbge
/* Avoid warnings. */
#define extql(a, b) __builtin_alpha_extql(a, (uint64_t) (b))
#define extwl(a, b) __builtin_alpha_extwl(a, (uint64_t) (b))
#define extqh(a, b) __builtin_alpha_extqh(a, (uint64_t) (b))
#define zap __builtin_alpha_zap
#define zapnot __builtin_alpha_zapnot
#define amask __builtin_alpha_amask
#define implver __builtin_alpha_implver
#define rpcc __builtin_alpha_rpcc
#else
#define prefetch(p) __asm__ volatile("ldl $31,%0" : : "m"(*(const char *) (p)) : "memory")
#define prefetch_en(p) __asm__ volatile("ldq $31,%0" : : "m"(*(const char *) (p)) : "memory")
#define prefetch_m(p) __asm__ volatile("lds $f31,%0" : : "m"(*(const char *) (p)) : "memory")
#define prefetch_men(p) __asm__ volatile("ldt $f31,%0" : : "m"(*(const char *) (p)) : "memory")
#define cmpbge(a, b) ({ uint64_t __r; __asm__ ("cmpbge %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
#define extql(a, b) ({ uint64_t __r; __asm__ ("extql %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
#define extwl(a, b) ({ uint64_t __r; __asm__ ("extwl %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
#define extqh(a, b) ({ uint64_t __r; __asm__ ("extqh %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
#define zap(a, b) ({ uint64_t __r; __asm__ ("zap %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
#define zapnot(a, b) ({ uint64_t __r; __asm__ ("zapnot %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
#define amask(a) ({ uint64_t __r; __asm__ ("amask %1,%0" : "=r" (__r) : "rI" (a)); __r; })
#define implver() ({ uint64_t __r; __asm__ ("implver %0" : "=r" (__r)); __r; })
#define rpcc() ({ uint64_t __r; __asm__ volatile ("rpcc %0" : "=r" (__r)); __r; })
#endif
#define wh64(p) __asm__ volatile("wh64 (%0)" : : "r"(p) : "memory")
#if AV_GCC_VERSION_AT_LEAST(3,3) && defined(__alpha_max__)
#define minub8 __builtin_alpha_minub8
#define minsb8 __builtin_alpha_minsb8
#define minuw4 __builtin_alpha_minuw4
#define minsw4 __builtin_alpha_minsw4
#define maxub8 __builtin_alpha_maxub8
#define maxsb8 __builtin_alpha_maxsb8
#define maxuw4 __builtin_alpha_maxuw4
#define maxsw4 __builtin_alpha_maxsw4
#define perr __builtin_alpha_perr
#define pklb __builtin_alpha_pklb
#define pkwb __builtin_alpha_pkwb
#define unpkbl __builtin_alpha_unpkbl
#define unpkbw __builtin_alpha_unpkbw
#else
#define minub8(a, b) ({ uint64_t __r; __asm__ (".arch ev6; minub8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
#define minsb8(a, b) ({ uint64_t __r; __asm__ (".arch ev6; minsb8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
#define minuw4(a, b) ({ uint64_t __r; __asm__ (".arch ev6; minuw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
#define minsw4(a, b) ({ uint64_t __r; __asm__ (".arch ev6; minsw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
#define maxub8(a, b) ({ uint64_t __r; __asm__ (".arch ev6; maxub8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
#define maxsb8(a, b) ({ uint64_t __r; __asm__ (".arch ev6; maxsb8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
#define maxuw4(a, b) ({ uint64_t __r; __asm__ (".arch ev6; maxuw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
#define maxsw4(a, b) ({ uint64_t __r; __asm__ (".arch ev6; maxsw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
#define perr(a, b) ({ uint64_t __r; __asm__ (".arch ev6; perr %r1,%r2,%0" : "=r" (__r) : "%rJ" (a), "rJ" (b)); __r; })
#define pklb(a) ({ uint64_t __r; __asm__ (".arch ev6; pklb %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
#define pkwb(a) ({ uint64_t __r; __asm__ (".arch ev6; pkwb %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
#define unpkbl(a) ({ uint64_t __r; __asm__ (".arch ev6; unpkbl %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
#define unpkbw(a) ({ uint64_t __r; __asm__ (".arch ev6; unpkbw %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
#endif
#else
#error "Unknown compiler!"
#endif
#endif /* AVCODEC_ALPHA_ASM_H */
|