refined the msb positioning functions
migrated hcl_isint()/hcl_isbigint() to hcl-prv.h
This commit is contained in:
136
lib/hcl-utl.h
136
lib/hcl-utl.h
@ -685,8 +685,9 @@ HCL_EXPORT int hcl_ucwidth (
|
||||
);
|
||||
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
/* =========================================================================
|
||||
* BIT SWAP
|
||||
* ========================================================================= */
|
||||
#if defined(HCL_HAVE_INLINE)
|
||||
|
||||
#if defined(HCL_HAVE_UINT16_T)
|
||||
@ -695,7 +696,10 @@ static HCL_INLINE hcl_uint16_t hcl_bswap16 (hcl_uint16_t x)
|
||||
#if defined(HCL_HAVE_BUILTIN_BSWAP16)
|
||||
return __builtin_bswap16(x);
|
||||
#elif defined(__GNUC__) && (defined(__x86_64) || defined(__amd64) || defined(__i386) || defined(i386))
|
||||
__asm__ volatile ("xchgb %b0, %h0" : "=Q"(x): "0"(x));
|
||||
__asm__ /*volatile*/ ("xchgb %b0, %h0" : "=Q"(x): "0"(x));
|
||||
return x;
|
||||
#elif defined(__GNUC__) && defined(__arm__) && (defined(__ARM_ARCH) && (__ARM_ARCH >= 6))
|
||||
__asm__ /*volatile*/ ("rev16 %0, %0" : "+r"(x));
|
||||
return x;
|
||||
#else
|
||||
return (x << 8) | (x >> 8);
|
||||
@ -709,7 +713,23 @@ static HCL_INLINE hcl_uint32_t hcl_bswap32 (hcl_uint32_t x)
|
||||
#if defined(HCL_HAVE_BUILTIN_BSWAP32)
|
||||
return __builtin_bswap32(x);
|
||||
#elif defined(__GNUC__) && (defined(__x86_64) || defined(__amd64) || defined(__i386) || defined(i386))
|
||||
__asm__ volatile ("bswapl %0" : "=r"(x) : "0"(x));
|
||||
__asm__ /*volatile*/ ("bswapl %0" : "=r"(x) : "0"(x));
|
||||
return x;
|
||||
#elif defined(__GNUC__) && defined(__aarch64__)
|
||||
__asm__ /*volatile*/ ("rev32 %0, %0" : "+r"(x));
|
||||
return x;
|
||||
#elif defined(__GNUC__) && defined(__arm__) && (defined(__ARM_ARCH) && (__ARM_ARCH >= 6))
|
||||
__asm__ /*volatile*/ ("rev %0, %0" : "+r"(x));
|
||||
return x;
|
||||
#elif defined(__GNUC__) && defined(__ARM_ARCH)
|
||||
hcl_uint32_t tmp;
|
||||
__asm__ /*volatile*/ (
|
||||
"eor %1, %0, %0, ror #16\n\t"
|
||||
"bic %1, %1, #0x00ff0000\n\t"
|
||||
"mov %0, %0, ror #8\n\t"
|
||||
"eor %0, %0, %1, lsr #8\n\t"
|
||||
:"+r"(x), "=&r"(tmp)
|
||||
);
|
||||
return x;
|
||||
#else
|
||||
return ((x >> 24)) |
|
||||
@ -726,7 +746,10 @@ static HCL_INLINE hcl_uint64_t hcl_bswap64 (hcl_uint64_t x)
|
||||
#if defined(HCL_HAVE_BUILTIN_BSWAP64)
|
||||
return __builtin_bswap64(x);
|
||||
#elif defined(__GNUC__) && (defined(__x86_64) || defined(__amd64))
|
||||
__asm__ volatile ("bswapq %0" : "=r"(x) : "0"(x));
|
||||
__asm__ /*volatile*/ ("bswapq %0" : "=r"(x) : "0"(x));
|
||||
return x;
|
||||
#elif defined(__GNUC__) && defined(__aarch64__)
|
||||
__asm__ /*volatile*/ ("rev %0, %0" : "+r"(x));
|
||||
return x;
|
||||
#else
|
||||
return ((x >> 56)) |
|
||||
@ -741,10 +764,12 @@ static HCL_INLINE hcl_uint64_t hcl_bswap64 (hcl_uint64_t x)
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(HCL_HAVE_UINT128_T)
|
||||
static HCL_INLINE hcl_uint128_t hcl_bswap128 (hcl_uint128_t x)
|
||||
{
|
||||
#if defined(HCL_HAVE_BUILTIN_BSWAP128)
|
||||
return __builtin_bswap128(x);
|
||||
#else
|
||||
return ((x >> 120)) |
|
||||
((x >> 104) & ((hcl_uint128_t)0xff << 8)) |
|
||||
((x >> 88) & ((hcl_uint128_t)0xff << 16)) |
|
||||
@ -761,23 +786,35 @@ static HCL_INLINE hcl_uint128_t hcl_bswap128 (hcl_uint128_t x)
|
||||
((x << 88) & ((hcl_uint128_t)0xff << 104)) |
|
||||
((x << 104) & ((hcl_uint128_t)0xff << 112)) |
|
||||
((x << 120));
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#if defined(HCL_HAVE_UINT16_T)
|
||||
# if defined(HCL_HAVE_BUILTIN_BSWAP16)
|
||||
# define hcl_bswap16(x) ((hcl_uint16_t)__builtin_bswap16((hcl_uint16_t)(x)))
|
||||
# else
|
||||
# define hcl_bswap16(x) ((hcl_uint16_t)(((hcl_uint16_t)(x)) << 8) | (((hcl_uint16_t)(x)) >> 8))
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if defined(HCL_HAVE_UINT32_T)
|
||||
# if defined(HCL_HAVE_BUILTIN_BSWAP32)
|
||||
# define hcl_bswap32(x) ((hcl_uint32_t)__builtin_bswap32((hcl_uint32_t)(x)))
|
||||
# else
|
||||
# define hcl_bswap32(x) ((hcl_uint32_t)(((((hcl_uint32_t)(x)) >> 24)) | \
|
||||
((((hcl_uint32_t)(x)) >> 8) & ((hcl_uint32_t)0xff << 8)) | \
|
||||
((((hcl_uint32_t)(x)) << 8) & ((hcl_uint32_t)0xff << 16)) | \
|
||||
((((hcl_uint32_t)(x)) << 24))))
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if defined(HCL_HAVE_UINT64_T)
|
||||
# if defined(HCL_HAVE_BUILTIN_BSWAP64)
|
||||
# define hcl_bswap64(x) ((hcl_uint64_t)__builtin_bswap64((hcl_uint64_t)(x)))
|
||||
# else
|
||||
# define hcl_bswap64(x) ((hcl_uint64_t)(((((hcl_uint64_t)(x)) >> 56)) | \
|
||||
((((hcl_uint64_t)(x)) >> 40) & ((hcl_uint64_t)0xff << 8)) | \
|
||||
((((hcl_uint64_t)(x)) >> 24) & ((hcl_uint64_t)0xff << 16)) | \
|
||||
@ -786,9 +823,13 @@ static HCL_INLINE hcl_uint128_t hcl_bswap128 (hcl_uint128_t x)
|
||||
((((hcl_uint64_t)(x)) << 24) & ((hcl_uint64_t)0xff << 40)) | \
|
||||
((((hcl_uint64_t)(x)) << 40) & ((hcl_uint64_t)0xff << 48)) | \
|
||||
((((hcl_uint64_t)(x)) << 56))))
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if defined(HCL_HAVE_UINT128_T)
|
||||
# if defined(HCL_HAVE_BUILTIN_BSWAP128)
|
||||
# define hcl_bswap128(x) ((hcl_uint128_t)__builtin_bswap128((hcl_uint128_t)(x)))
|
||||
# else
|
||||
# define hcl_bswap128(x) ((hcl_uint128_t)(((((hcl_uint128_t)(x)) >> 120)) | \
|
||||
((((hcl_uint128_t)(x)) >> 104) & ((hcl_uint128_t)0xff << 8)) | \
|
||||
((((hcl_uint128_t)(x)) >> 88) & ((hcl_uint128_t)0xff << 16)) | \
|
||||
@ -805,6 +846,7 @@ static HCL_INLINE hcl_uint128_t hcl_bswap128 (hcl_uint128_t x)
|
||||
((((hcl_uint128_t)(x)) << 88) & ((hcl_uint128_t)0xff << 104)) | \
|
||||
((((hcl_uint128_t)(x)) << 104) & ((hcl_uint128_t)0xff << 112)) | \
|
||||
((((hcl_uint128_t)(x)) << 120))))
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#endif /* HCL_HAVE_INLINE */
|
||||
@ -891,7 +933,89 @@ static HCL_INLINE hcl_uint128_t hcl_bswap128 (hcl_uint128_t x)
|
||||
# error UNKNOWN ENDIAN
|
||||
#endif
|
||||
|
||||
/* =========================================================================
|
||||
* BIT POSITION
|
||||
* ========================================================================= */
|
||||
static HCL_INLINE int hcl_get_pos_of_msb_set_pow2 (hcl_oow_t x)
|
||||
{
|
||||
/* the caller must ensure that x is power of 2. if x happens to be zero,
|
||||
* the return value is undefined as each method used may give different result. */
|
||||
#if defined(HCL_HAVE_BUILTIN_CTZLL) && (HCL_SIZEOF_OOW_T == HCL_SIZEOF_LONG_LONG)
|
||||
return __builtin_ctzll(x); /* count the number of trailing zeros */
|
||||
#elif defined(HCL_HAVE_BUILTIN_CTZL) && (HCL_SIZEOF_OOW_T == HCL_SIZEOF_LONG)
|
||||
return __builtin_ctzl(x); /* count the number of trailing zeros */
|
||||
#elif defined(HCL_HAVE_BUILTIN_CTZ) && (HCL_SIZEOF_OOW_T == HCL_SIZEOF_INT)
|
||||
return __builtin_ctz(x); /* count the number of trailing zeros */
|
||||
#elif defined(__GNUC__) && (defined(__x86_64) || defined(__amd64) || defined(__i386) || defined(i386))
|
||||
hcl_oow_t pos;
|
||||
/* use the Bit Scan Forward instruction */
|
||||
#if 1
|
||||
__asm__ volatile (
|
||||
"bsf %1,%0\n\t"
|
||||
: "=r"(pos) /* output */
|
||||
: "r"(x) /* input */
|
||||
);
|
||||
#else
|
||||
__asm__ volatile (
|
||||
"bsf %[X],%[EXP]\n\t"
|
||||
: [EXP]"=r"(pos) /* output */
|
||||
: [X]"r"(x) /* input */
|
||||
);
|
||||
#endif
|
||||
return (int)pos;
|
||||
#elif defined(__GNUC__) && defined(__aarch64__) || (defined(__arm__) && (defined(__ARM_ARCH) && (__ARM_ARCH >= 5)))
|
||||
hcl_oow_t n;
|
||||
/* CLZ is available in ARMv5T and above. there is no instruction to
|
||||
* count trailing zeros or something similar. using RBIT with CLZ
|
||||
* would be good in ARMv6T2 and above to avoid further calculation
|
||||
* afte CLZ */
|
||||
__asm__ volatile (
|
||||
"clz %0,%1\n\t"
|
||||
: "=r"(n) /* output */
|
||||
: "r"(x) /* input */
|
||||
);
|
||||
return (int)(HCL_OOW_BITS - n - 1);
|
||||
/* TODO: PPC - use cntlz, cntlzw, cntlzd, SPARC - use lzcnt, MIPS clz */
|
||||
#else
|
||||
int pos = 0;
|
||||
while (x >>= 1) pos++;
|
||||
return pos;
|
||||
#endif
|
||||
}
|
||||
|
||||
static HCL_INLINE int hcl_get_pos_of_msb_set (hcl_oow_t x)
|
||||
{
|
||||
/* x doesn't have to be power of 2. if x is zero, the result is undefined */
|
||||
#if defined(HCL_HAVE_BUILTIN_CLZLL) && (HCL_SIZEOF_OOW_T == HCL_SIZEOF_LONG_LONG)
|
||||
return HCL_OOW_BITS - __builtin_clzll(x) - 1; /* count the number of leading zeros */
|
||||
#elif defined(HCL_HAVE_BUILTIN_CLZL) && (HCL_SIZEOF_OOW_T == HCL_SIZEOF_LONG)
|
||||
return HCL_OOW_BITS - __builtin_clzl(x) - 1; /* count the number of leading zeros */
|
||||
#elif defined(HCL_HAVE_BUILTIN_CLZ) && (HCL_SIZEOF_OOW_T == HCL_SIZEOF_INT)
|
||||
return HCL_OOW_BITS - __builtin_clz(x) - 1; /* count the number of leading zeros */
|
||||
#elif defined(__GNUC__) && (defined(__x86_64) || defined(__amd64) || defined(__i386) || defined(i386))
|
||||
/* bit scan reverse. not all x86 CPUs have LZCNT. */
|
||||
hcl_oow_t pos;
|
||||
__asm__ volatile (
|
||||
"bsr %1,%0\n\t"
|
||||
: "=r"(pos) /* output */
|
||||
: "r"(x) /* input */
|
||||
);
|
||||
return (int)pos;
|
||||
#elif defined(__GNUC__) && defined(__aarch64__) || (defined(__arm__) && (defined(__ARM_ARCH) && (__ARM_ARCH >= 5)))
|
||||
hcl_oow_t n;
|
||||
__asm__ volatile (
|
||||
"clz %0,%1\n\t"
|
||||
: "=r"(n) /* output */
|
||||
: "r"(x) /* input */
|
||||
);
|
||||
return (int)(HCL_OOW_BITS - n - 1);
|
||||
/* TODO: PPC - use cntlz, cntlzw, cntlzd, SPARC - use lzcnt, MIPS clz */
|
||||
#else
|
||||
int pos = 0;
|
||||
while (x >>= 1) pos++;
|
||||
return pos;
|
||||
#endif
|
||||
}
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
Reference in New Issue
Block a user