refined the msb positioning functions

migrated hcl_isint()/hcl_isbigint() to hcl-prv.h
This commit is contained in:
2019-05-04 17:56:45 +00:00
parent a58f9e47b0
commit 6e5a92a041
16 changed files with 282 additions and 263 deletions

View File

@ -685,8 +685,9 @@ HCL_EXPORT int hcl_ucwidth (
);
/* ------------------------------------------------------------------------- */
/* =========================================================================
* BIT SWAP
* ========================================================================= */
#if defined(HCL_HAVE_INLINE)
#if defined(HCL_HAVE_UINT16_T)
@ -695,7 +696,10 @@ static HCL_INLINE hcl_uint16_t hcl_bswap16 (hcl_uint16_t x)
#if defined(HCL_HAVE_BUILTIN_BSWAP16)
return __builtin_bswap16(x);
#elif defined(__GNUC__) && (defined(__x86_64) || defined(__amd64) || defined(__i386) || defined(i386))
__asm__ volatile ("xchgb %b0, %h0" : "=Q"(x): "0"(x));
__asm__ /*volatile*/ ("xchgb %b0, %h0" : "=Q"(x): "0"(x));
return x;
#elif defined(__GNUC__) && defined(__arm__) && (defined(__ARM_ARCH) && (__ARM_ARCH >= 6))
__asm__ /*volatile*/ ("rev16 %0, %0" : "+r"(x));
return x;
#else
return (x << 8) | (x >> 8);
@ -709,7 +713,23 @@ static HCL_INLINE hcl_uint32_t hcl_bswap32 (hcl_uint32_t x)
#if defined(HCL_HAVE_BUILTIN_BSWAP32)
return __builtin_bswap32(x);
#elif defined(__GNUC__) && (defined(__x86_64) || defined(__amd64) || defined(__i386) || defined(i386))
__asm__ volatile ("bswapl %0" : "=r"(x) : "0"(x));
__asm__ /*volatile*/ ("bswapl %0" : "=r"(x) : "0"(x));
return x;
#elif defined(__GNUC__) && defined(__aarch64__)
__asm__ /*volatile*/ ("rev32 %0, %0" : "+r"(x));
return x;
#elif defined(__GNUC__) && defined(__arm__) && (defined(__ARM_ARCH) && (__ARM_ARCH >= 6))
__asm__ /*volatile*/ ("rev %0, %0" : "+r"(x));
return x;
#elif defined(__GNUC__) && defined(__ARM_ARCH)
hcl_uint32_t tmp;
__asm__ /*volatile*/ (
"eor %1, %0, %0, ror #16\n\t"
"bic %1, %1, #0x00ff0000\n\t"
"mov %0, %0, ror #8\n\t"
"eor %0, %0, %1, lsr #8\n\t"
:"+r"(x), "=&r"(tmp)
);
return x;
#else
return ((x >> 24)) |
@ -726,7 +746,10 @@ static HCL_INLINE hcl_uint64_t hcl_bswap64 (hcl_uint64_t x)
#if defined(HCL_HAVE_BUILTIN_BSWAP64)
return __builtin_bswap64(x);
#elif defined(__GNUC__) && (defined(__x86_64) || defined(__amd64))
__asm__ volatile ("bswapq %0" : "=r"(x) : "0"(x));
__asm__ /*volatile*/ ("bswapq %0" : "=r"(x) : "0"(x));
return x;
#elif defined(__GNUC__) && defined(__aarch64__)
__asm__ /*volatile*/ ("rev %0, %0" : "+r"(x));
return x;
#else
return ((x >> 56)) |
@ -741,10 +764,12 @@ static HCL_INLINE hcl_uint64_t hcl_bswap64 (hcl_uint64_t x)
}
#endif
#if defined(HCL_HAVE_UINT128_T)
static HCL_INLINE hcl_uint128_t hcl_bswap128 (hcl_uint128_t x)
{
#if defined(HCL_HAVE_BUILTIN_BSWAP128)
return __builtin_bswap128(x);
#else
return ((x >> 120)) |
((x >> 104) & ((hcl_uint128_t)0xff << 8)) |
((x >> 88) & ((hcl_uint128_t)0xff << 16)) |
@ -761,23 +786,35 @@ static HCL_INLINE hcl_uint128_t hcl_bswap128 (hcl_uint128_t x)
((x << 88) & ((hcl_uint128_t)0xff << 104)) |
((x << 104) & ((hcl_uint128_t)0xff << 112)) |
((x << 120));
#endif
}
#endif
#else
#if defined(HCL_HAVE_UINT16_T)
# if defined(HCL_HAVE_BUILTIN_BSWAP16)
# define hcl_bswap16(x) ((hcl_uint16_t)__builtin_bswap16((hcl_uint16_t)(x)))
# else
# define hcl_bswap16(x) ((hcl_uint16_t)(((hcl_uint16_t)(x)) << 8) | (((hcl_uint16_t)(x)) >> 8))
# endif
#endif
#if defined(HCL_HAVE_UINT32_T)
# if defined(HCL_HAVE_BUILTIN_BSWAP32)
# define hcl_bswap32(x) ((hcl_uint32_t)__builtin_bswap32((hcl_uint32_t)(x)))
# else
# define hcl_bswap32(x) ((hcl_uint32_t)(((((hcl_uint32_t)(x)) >> 24)) | \
((((hcl_uint32_t)(x)) >> 8) & ((hcl_uint32_t)0xff << 8)) | \
((((hcl_uint32_t)(x)) << 8) & ((hcl_uint32_t)0xff << 16)) | \
((((hcl_uint32_t)(x)) << 24))))
# endif
#endif
#if defined(HCL_HAVE_UINT64_T)
# if defined(HCL_HAVE_BUILTIN_BSWAP64)
# define hcl_bswap64(x) ((hcl_uint64_t)__builtin_bswap64((hcl_uint64_t)(x)))
# else
# define hcl_bswap64(x) ((hcl_uint64_t)(((((hcl_uint64_t)(x)) >> 56)) | \
((((hcl_uint64_t)(x)) >> 40) & ((hcl_uint64_t)0xff << 8)) | \
((((hcl_uint64_t)(x)) >> 24) & ((hcl_uint64_t)0xff << 16)) | \
@ -786,9 +823,13 @@ static HCL_INLINE hcl_uint128_t hcl_bswap128 (hcl_uint128_t x)
((((hcl_uint64_t)(x)) << 24) & ((hcl_uint64_t)0xff << 40)) | \
((((hcl_uint64_t)(x)) << 40) & ((hcl_uint64_t)0xff << 48)) | \
((((hcl_uint64_t)(x)) << 56))))
# endif
#endif
#if defined(HCL_HAVE_UINT128_T)
# if defined(HCL_HAVE_BUILTIN_BSWAP128)
# define hcl_bswap128(x) ((hcl_uint128_t)__builtin_bswap128((hcl_uint128_t)(x)))
# else
# define hcl_bswap128(x) ((hcl_uint128_t)(((((hcl_uint128_t)(x)) >> 120)) | \
((((hcl_uint128_t)(x)) >> 104) & ((hcl_uint128_t)0xff << 8)) | \
((((hcl_uint128_t)(x)) >> 88) & ((hcl_uint128_t)0xff << 16)) | \
@ -805,6 +846,7 @@ static HCL_INLINE hcl_uint128_t hcl_bswap128 (hcl_uint128_t x)
((((hcl_uint128_t)(x)) << 88) & ((hcl_uint128_t)0xff << 104)) | \
((((hcl_uint128_t)(x)) << 104) & ((hcl_uint128_t)0xff << 112)) | \
((((hcl_uint128_t)(x)) << 120))))
# endif
#endif
#endif /* HCL_HAVE_INLINE */
@ -891,7 +933,89 @@ static HCL_INLINE hcl_uint128_t hcl_bswap128 (hcl_uint128_t x)
# error UNKNOWN ENDIAN
#endif
/* =========================================================================
* BIT POSITION
* ========================================================================= */
static HCL_INLINE int hcl_get_pos_of_msb_set_pow2 (hcl_oow_t x)
{
/* the caller must ensure that x is power of 2. if x happens to be zero,
* the return value is undefined as each method used may give different result. */
#if defined(HCL_HAVE_BUILTIN_CTZLL) && (HCL_SIZEOF_OOW_T == HCL_SIZEOF_LONG_LONG)
return __builtin_ctzll(x); /* count the number of trailing zeros */
#elif defined(HCL_HAVE_BUILTIN_CTZL) && (HCL_SIZEOF_OOW_T == HCL_SIZEOF_LONG)
return __builtin_ctzl(x); /* count the number of trailing zeros */
#elif defined(HCL_HAVE_BUILTIN_CTZ) && (HCL_SIZEOF_OOW_T == HCL_SIZEOF_INT)
return __builtin_ctz(x); /* count the number of trailing zeros */
#elif defined(__GNUC__) && (defined(__x86_64) || defined(__amd64) || defined(__i386) || defined(i386))
hcl_oow_t pos;
/* use the Bit Scan Forward instruction */
#if 1
__asm__ volatile (
"bsf %1,%0\n\t"
: "=r"(pos) /* output */
: "r"(x) /* input */
);
#else
__asm__ volatile (
"bsf %[X],%[EXP]\n\t"
: [EXP]"=r"(pos) /* output */
: [X]"r"(x) /* input */
);
#endif
return (int)pos;
#elif defined(__GNUC__) && defined(__aarch64__) || (defined(__arm__) && (defined(__ARM_ARCH) && (__ARM_ARCH >= 5)))
hcl_oow_t n;
/* CLZ is available in ARMv5T and above. there is no instruction to
* count trailing zeros or something similar. using RBIT with CLZ
* would be good in ARMv6T2 and above to avoid further calculation
* afte CLZ */
__asm__ volatile (
"clz %0,%1\n\t"
: "=r"(n) /* output */
: "r"(x) /* input */
);
return (int)(HCL_OOW_BITS - n - 1);
/* TODO: PPC - use cntlz, cntlzw, cntlzd, SPARC - use lzcnt, MIPS clz */
#else
int pos = 0;
while (x >>= 1) pos++;
return pos;
#endif
}
static HCL_INLINE int hcl_get_pos_of_msb_set (hcl_oow_t x)
{
/* x doesn't have to be power of 2. if x is zero, the result is undefined */
#if defined(HCL_HAVE_BUILTIN_CLZLL) && (HCL_SIZEOF_OOW_T == HCL_SIZEOF_LONG_LONG)
return HCL_OOW_BITS - __builtin_clzll(x) - 1; /* count the number of leading zeros */
#elif defined(HCL_HAVE_BUILTIN_CLZL) && (HCL_SIZEOF_OOW_T == HCL_SIZEOF_LONG)
return HCL_OOW_BITS - __builtin_clzl(x) - 1; /* count the number of leading zeros */
#elif defined(HCL_HAVE_BUILTIN_CLZ) && (HCL_SIZEOF_OOW_T == HCL_SIZEOF_INT)
return HCL_OOW_BITS - __builtin_clz(x) - 1; /* count the number of leading zeros */
#elif defined(__GNUC__) && (defined(__x86_64) || defined(__amd64) || defined(__i386) || defined(i386))
/* bit scan reverse. not all x86 CPUs have LZCNT. */
hcl_oow_t pos;
__asm__ volatile (
"bsr %1,%0\n\t"
: "=r"(pos) /* output */
: "r"(x) /* input */
);
return (int)pos;
#elif defined(__GNUC__) && defined(__aarch64__) || (defined(__arm__) && (defined(__ARM_ARCH) && (__ARM_ARCH >= 5)))
hcl_oow_t n;
__asm__ volatile (
"clz %0,%1\n\t"
: "=r"(n) /* output */
: "r"(x) /* input */
);
return (int)(HCL_OOW_BITS - n - 1);
/* TODO: PPC - use cntlz, cntlzw, cntlzd, SPARC - use lzcnt, MIPS clz */
#else
int pos = 0;
while (x >>= 1) pos++;
return pos;
#endif
}
#if defined(__cplusplus)
}
#endif