From 80b05e9d95e24c00da2eec8e4271fc95f1a3938c Mon Sep 17 00:00:00 2001 From: "hyunghwan.chung" Date: Sat, 4 May 2019 02:16:25 +0000 Subject: [PATCH] used arm inline assembly in some byte swap functions --- moo/lib/bigint.c | 4 ++-- moo/lib/moo-cmn.h | 17 +++++++++++++++++ moo/lib/moo-utl.h | 22 +++++++++++++++++++--- 3 files changed, 38 insertions(+), 5 deletions(-) diff --git a/moo/lib/bigint.c b/moo/lib/bigint.c index 74812db..5abc831 100644 --- a/moo/lib/bigint.c +++ b/moo/lib/bigint.c @@ -161,7 +161,7 @@ static MOO_INLINE int get_pos_of_msb_set_pow2 (moo_oow_t x) ); #endif return (int)pos; -#elif defined(__GNUC__) && defined(__arm__) && (defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_8__)) +#elif defined(__GNUC__) && defined(__arm__) && (defined(__ARM_ARCH) && (__ARM_ARCH >= 5)) moo_oow_t n; /* CLZ is available in ARMv5T and above. there is no instruction to * count trailing zeros or something similar. using RBIT with CLZ @@ -199,7 +199,7 @@ static MOO_INLINE int get_pos_of_msb_set (moo_oow_t x) : "r"(x) /* input */ ); return (int)pos; -#elif defined(__GNUC__) && defined(__arm__) && (defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_8__)) +#elif defined(__GNUC__) && defined(__arm__) && (defined(__ARM_ARCH) && (__ARM_ARCH >= 5)) moo_oow_t n; __asm__ volatile ( "clz %0,%1\n\t" diff --git a/moo/lib/moo-cmn.h b/moo/lib/moo-cmn.h index 6f3f227..de27633 100644 --- a/moo/lib/moo-cmn.h +++ b/moo/lib/moo-cmn.h @@ -45,6 +45,11 @@ # error UNSUPPORTED SYSTEM #endif + +/* ========================================================================= + * ARCHITECTURE/COMPILER TWEAKS + * ========================================================================= */ + #if defined(EMSCRIPTEN) # if defined(MOO_SIZEOF___INT128) # undef MOO_SIZEOF___INT128 @@ -57,6 +62,18 @@ # endif #endif +#if defined(__GNUC__) && defined(__arm__) && !defined(__ARM_ARCH) +# if defined(__ARM_ARCH_8__) +# define __ARM_ARCH 8 +# elif defined(__ARM_ARCH_7__) +# define __ARM_ARCH 7 +# elif defined(__ARM_ARCH_6__) +# define __ARM_ARCH 6 +# elif defined(__ARM_ARCH_5__) +# define __ARM_ARCH 5 +# endif +#endif + /* ========================================================================= * PRIMITIVE TYPE DEFINTIONS * ========================================================================= */ diff --git a/moo/lib/moo-utl.h b/moo/lib/moo-utl.h index 509e179..a7a8278 100644 --- a/moo/lib/moo-utl.h +++ b/moo/lib/moo-utl.h @@ -746,8 +746,10 @@ static MOO_INLINE moo_uint16_t moo_bswap16 (moo_uint16_t x) #if defined(MOO_HAVE_BUILTIN_BSWAP16) return __builtin_bswap16(x); #elif defined(__GNUC__) && (defined(__x86_64) || defined(__amd64) || defined(__i386) || defined(i386)) - __asm__ volatile ("xchgb %b0, %h0" : "=Q"(x): "0"(x)); + __asm__ /*volatile*/ ("xchgb %b0, %h0" : "=Q"(x): "0"(x)); return x; +#elif defined(__GNUC__) && (defined(__ARM_ARCH) && (__ARM_ARCH >= 6)) + __asm__ /*volatile*/ ("rev16 %0, %0" : "+r"(x)); #else return (x << 8) | (x >> 8); #endif @@ -760,7 +762,19 @@ static MOO_INLINE moo_uint32_t moo_bswap32 (moo_uint32_t x) #if defined(MOO_HAVE_BUILTIN_BSWAP32) return __builtin_bswap32(x); #elif defined(__GNUC__) && (defined(__x86_64) || defined(__amd64) || defined(__i386) || defined(i386)) - __asm__ volatile ("bswapl %0" : "=r"(x) : "0"(x)); + __asm__ /*volatile*/ ("bswapl %0" : "=r"(x) : "0"(x)); + return x; +#elif defined(__GNUC__) && (defined(__ARM_ARCH) && (__ARM_ARCH >= 6)) + __asm__ /*volatile*/ ("rev32 %0, %0" : "+r"(x)); +#elif defined(__GNUC__) && defined(__ARM_ARCH) + moo_uint32_t tmp; + __asm__ /*volatile*/ ( + "eor %1, %0, %0, ror #16\n\t" + "bic %1, %1, #0x00ff0000\n\t" + "mov %0, %0, ror #8\n\t" + "eor %0, %0, %1, lsr #8\n\t" + :"+r"(x), "=&r"(tmp) + ); return x; #else return ((x >> 24)) | @@ -777,8 +791,10 @@ static MOO_INLINE moo_uint64_t moo_bswap64 (moo_uint64_t x) #if defined(MOO_HAVE_BUILTIN_BSWAP64) return __builtin_bswap64(x); #elif defined(__GNUC__) && (defined(__x86_64) || defined(__amd64)) - __asm__ volatile ("bswapq %0" : "=r"(x) : "0"(x)); + __asm__ /*volatile*/ ("bswapq %0" : "=r"(x) : "0"(x)); return x; +#elif defined(__GNUC__) && defined(__aarch64__) + __asm__ /*volatile*/ ("rev64 %0, %0" : "+r"(x)); #else return ((x >> 56)) | ((x >> 40) & ((moo_uint64_t)0xff << 8)) |