migrated msb postionting function to moo-utl.h

2019-05-04 04:27:27 +00:00 · 2019-05-04 04:27:27 +00:00 · eba44fc039
commit eba44fc039
parent 80b05e9d95
12 changed files with 193 additions and 119 deletions
--- a/moo/lib/bigint.c
+++ b/moo/lib/bigint.c
@ -134,96 +134,14 @@ static const moo_uint8_t debruijn_64[64] =
 #	define LOG2_FOR_POW2_64(x) (debruijn_64[(moo_uint64_t)((moo_uint64_t)(x) * 0x022fdd63cc95386d) >> 58])
 #endif

-static MOO_INLINE int get_pos_of_msb_set_pow2 (moo_oow_t x)
-{
-	/* the caller must ensure that x is power of 2. if x happens to be zero,
-	 * the return value is undefined as each method used may give different result. */
-#if defined(MOO_HAVE_BUILTIN_CTZLL) && (MOO_SIZEOF_OOW_T == MOO_SIZEOF_LONG_LONG)
-	return __builtin_ctzll(x); /* count the number of trailing zeros */
-#elif defined(MOO_HAVE_BUILTIN_CTZL) && (MOO_SIZEOF_OOW_T == MOO_SIZEOF_LONG)
-	return __builtin_ctzl(x); /* count the number of trailing zeros */
-#elif defined(MOO_HAVE_BUILTIN_CTZ) && (MOO_SIZEOF_OOW_T == MOO_SIZEOF_INT)
-	return __builtin_ctz(x); /* count the number of trailing zeros */
-#elif defined(__GNUC__) && (defined(__x86_64) || defined(__amd64) || defined(__i386) || defined(i386))
-	moo_oow_t pos;
-	/* use the Bit Scan Forward instruction */
-#if 1
-	__asm__ volatile (
-		"bsf %1,%0\n\t"
-		: "=r"(pos) /* output */
-		: "r"(x) /* input */
-	);
-#else
-	__asm__ volatile (
-		"bsf %[X],%[EXP]\n\t"
-		: [EXP]"=r"(pos) /* output */
-		: [X]"r"(x) /* input */
-	);
-#endif
-	return (int)pos;
-#elif defined(__GNUC__) && defined(__arm__) && (defined(__ARM_ARCH) && (__ARM_ARCH >= 5))
-	moo_oow_t n;
-	/* CLZ is available in ARMv5T and above. there is no instruction to
-	 * count trailing zeros or something similar. using RBIT with CLZ
-	 * would be good in ARMv6T2 and above to avoid further calculation
-	 * afte CLZ */
-	__asm__ volatile (
-		"clz %0,%1\n\t"
-		: "=r"(n) /* output */
-		: "r"(x) /* input */
-	);
-	return (int)(MOO_OOW_BITS - n - 1); 
-	/* TODO: PPC - use cntlz, cntlzw, cntlzd, SPARC - use lzcnt, MIPS clz */
-#else
-	int pos = 0;
-	while (x >>= 1) pos++;
-	return pos;
-#endif
-}
-
-static MOO_INLINE int get_pos_of_msb_set (moo_oow_t x)
-{
-	/* x doesn't have to be power of 2. if x is zero, the result is undefined */
-#if defined(MOO_HAVE_BUILTIN_CLZLL) && (MOO_SIZEOF_OOW_T == MOO_SIZEOF_LONG_LONG)
-	return MOO_OOW_BITS - __builtin_clzll(x) - 1; /* count the number of leading zeros */
-#elif defined(MOO_HAVE_BUILTIN_CLZL) && (MOO_SIZEOF_OOW_T == MOO_SIZEOF_LONG)
-	return MOO_OOW_BITS - __builtin_clzl(x) - 1; /* count the number of leading zeros */
-#elif defined(MOO_HAVE_BUILTIN_CLZ) && (MOO_SIZEOF_OOW_T == MOO_SIZEOF_INT)
-	return MOO_OOW_BITS - __builtin_clz(x) - 1; /* count the number of leading zeros */
-#elif defined(__GNUC__) && (defined(__x86_64) || defined(__amd64) || defined(__i386) || defined(i386))
-	/* bit scan reverse. not all x86 CPUs have LZCNT. */
-	moo_oow_t pos;
-	__asm__ volatile (
-		"bsr %1,%0\n\t"
-		: "=r"(pos) /* output */
-		: "r"(x) /* input */
-	);
-	return (int)pos;
-#elif defined(__GNUC__) && defined(__arm__) && (defined(__ARM_ARCH) && (__ARM_ARCH >= 5))
-	moo_oow_t n;
-	__asm__ volatile (
-		"clz %0,%1\n\t"
-		: "=r"(n) /* output */
-		: "r"(x) /* input */
-	);
-	return (int)(MOO_OOW_BITS - n - 1); 
-	/* TODO: PPC - use cntlz, cntlzw, cntlzd, SPARC - use lzcnt, MIPS clz */
-#else
-	int pos = 0;
-	while (x >>= 1) pos++;
-	return pos;
-#endif
-}
-
 #if defined(MOO_HAVE_UINT32_T) && (MOO_SIZEOF_OOW_T == MOO_SIZEOF_UINT32_T)
 #	define LOG2_FOR_POW2(x) LOG2_FOR_POW2_32(x)
 #elif defined(MOO_HAVE_UINT64_T) && (MOO_SIZEOF_OOW_T == MOO_SIZEOF_UINT64_T)
 #	define LOG2_FOR_POW2(x) LOG2_FOR_POW2_64(x)
 #else
-#	define LOG2_FOR_POW2(x) get_pos_of_msb_set_pow2(x)
+#	define LOG2_FOR_POW2(x) moo_get_pos_of_msb_set_pow2(x)
 #endif

-
 #if (MOO_SIZEOF_OOW_T == MOO_SIZEOF_INT) && defined(MOO_HAVE_BUILTIN_UADD_OVERFLOW)
 #	define oow_add_overflow(a,b,c)  __builtin_uadd_overflow(a,b,c)
 #elif (MOO_SIZEOF_OOW_T == MOO_SIZEOF_LONG) && defined(MOO_HAVE_BUILTIN_UADDL_OVERFLOW)
@ -1924,9 +1842,9 @@ static void divide_unsigned_array3 (moo_t* moo, const moo_liw_t* x, moo_oow_t xs
 #endif

 	y1 = y[ys - 1];
-	/*s = MOO_LIW_BITS - ((y1 == 0)? -1: get_pos_of_msb_set(y1)) - 1;*/
+	/*s = MOO_LIW_BITS - ((y1 == 0)? -1: moo_get_pos_of_msb_set(y1)) - 1;*/
 	MOO_ASSERT (moo, y1 > 0); /* the highest word can't be non-zero in the context where this function is called */
-	s = MOO_LIW_BITS - get_pos_of_msb_set(y1) - 1;
+	s = MOO_LIW_BITS - moo_get_pos_of_msb_set(y1) - 1;
 	for (i = ys; i > 1; )
 	{
 		--i;
--- a/moo/lib/fmtout.c
+++ b/moo/lib/fmtout.c
@ -42,7 +42,7 @@ floting-point conversion implementation*/

 /* Max number conversion buffer length: 
 * moo_intmax_t in base 2, plus NUL byte. */
-#define MAXNBUF (MOO_SIZEOF(moo_intmax_t) * 8 + 1)
+#define MAXNBUF (MOO_SIZEOF(moo_intmax_t) * MOO_BITS_PER_BYTE + 1)

 enum
 {
--- a/moo/lib/fmtoutv.h
+++ b/moo/lib/fmtoutv.h
@ -999,7 +999,7 @@ static int fmtoutv (moo_t* moo, const fmtchar_t* fmt, moo_fmtout_data_t* data, v
 				#else
 					register int shift = i * MOO_SIZEOF(moo_oow_t);
 					moo_oow_t x = va_arg (ap, moo_oow_t);
-					num |= (moo_uintmax_t)x << (shift * 8);
+					num |= (moo_uintmax_t)x << (shift * MOO_BITS_PER_BYTE);
 				#endif
 				}
 			#else
@ -1043,7 +1043,7 @@ static int fmtoutv (moo_t* moo, const fmtchar_t* fmt, moo_fmtout_data_t* data, v
 				#else
 					register int shift = i * MOO_SIZEOF(moo_oow_t);
 					moo_oow_t x = va_arg (ap, moo_oow_t);
-					num |= (moo_uintmax_t)x << (shift * 8);
+					num |= (moo_uintmax_t)x << (shift * MOO_BITS_PER_BYTE);
 				#endif
 				}
 			#else
--- a/moo/lib/moo-cmn.h
+++ b/moo/lib/moo-cmn.h
@ -71,6 +71,8 @@
 #		define __ARM_ARCH 6
 #	elif defined(__ARM_ARCH_5__)
 #		define __ARM_ARCH 5
+#	elif defined(__ARM_ARCH_4__)
+#		define __ARM_ARCH 4
 #	endif
 #endif

@ -334,6 +336,13 @@
 #	error UNKNOWN INTMAX SIZE
 #endif

+/* =========================================================================
+ * BASIC HARD-CODED DEFINES
+ * ========================================================================= */
+#define MOO_BITS_PER_BYTE (8)
+/* the maximum number of bch charaters to represent a single uch character */
+#define MOO_BCSIZE_MAX 6
+
 /* =========================================================================
 * BASIC MOO TYPES
 * ========================================================================= */
@ -373,11 +382,15 @@ typedef moo_uintptr_t           moo_oow_t;
 typedef moo_intptr_t            moo_ooi_t;
 #define MOO_SIZEOF_OOW_T MOO_SIZEOF_UINTPTR_T
 #define MOO_SIZEOF_OOI_T MOO_SIZEOF_INTPTR_T
+#define MOO_OOW_BITS  (MOO_SIZEOF_OOW_T * MOO_BITS_PER_BYTE)
+#define MOO_OOI_BITS  (MOO_SIZEOF_OOI_T * MOO_BITS_PER_BYTE)

 typedef moo_ushortptr_t         moo_oohw_t; /* half word - half word */
 typedef moo_shortptr_t          moo_oohi_t; /* signed half word */
 #define MOO_SIZEOF_OOHW_T MOO_SIZEOF_USHORTPTR_T
 #define MOO_SIZEOF_OOHI_T MOO_SIZEOF_SHORTPTR_T
+#define MOO_OOHW_BITS  (MOO_SIZEOF_OOHW_T * MOO_BITS_PER_BYTE)
+#define MOO_OOHI_BITS  (MOO_SIZEOF_OOHI_T * MOO_BITS_PER_BYTE)

 struct moo_ucs_t
 {
@ -409,14 +422,9 @@ typedef struct moo_bcs_t moo_bcs_t;
 #	define MOO_SIZEOF_OOCH_T MOO_SIZEOF_BCH_T
 #endif

-/* the maximum number of bch charaters to represent a single uch character */
-#define MOO_BCSIZE_MAX 6
-
-
 /* =========================================================================
 * BASIC OOP ENCODING
 * ========================================================================= */
-
 /* actual structure defined in moo.h */
 typedef struct moo_obj_t           moo_obj_t;
 typedef struct moo_obj_t*          moo_oop_t;
@ -637,11 +645,11 @@ struct moo_ntime_t

 /* make a bit mask that can mask off low n bits */
 #define MOO_LBMASK(type,n) (~(~((type)0) << (n))) 
-#define MOO_LBMASK_SAFE(type,n) (((n) < MOO_SIZEOF(type) * 8)? MOO_LBMASK(type,n): ~(type)0)
+#define MOO_LBMASK_SAFE(type,n) (((n) < MOO_SIZEOF(type) * MOO_BITS_PER_BYTE)? MOO_LBMASK(type,n): ~(type)0)

 /* make a bit mask that can mask off hig n bits */
 #define MOO_HBMASK(type,n) (~(~((type)0) >> (n)))
-#define MOO_HBMASK_SAFE(type,n) (((n) < MOO_SIZEOF(type) * 8)? MOO_HBMASK(type,n): ~(type)0)
+#define MOO_HBMASK_SAFE(type,n) (((n) < MOO_SIZEOF(type) * MOO_BITS_PER_BYTE)? MOO_HBMASK(type,n): ~(type)0)

 /* get 'length' bits starting from the bit at the 'offset' */
 #define MOO_GETBITS(type,value,offset,length) \
@ -668,7 +676,7 @@ struct moo_ntime_t
 * \endcode
 */
 /*#define MOO_BITS_MAX(type,nbits) ((((type)1) << (nbits)) - 1)*/
-#define MOO_BITS_MAX(type,nbits) ((~(type)0) >> (MOO_SIZEOF(type) * 8 - (nbits)))
+#define MOO_BITS_MAX(type,nbits) ((~(type)0) >> (MOO_SIZEOF(type) * MOO_BITS_PER_BYTE - (nbits)))

 /* =========================================================================
 * MMGR
@ -819,11 +827,11 @@ typedef struct moo_t moo_t;
 #define MOO_TYPE_IS_UNSIGNED(type) (((type)0) < ((type)-1))

 #define MOO_TYPE_SIGNED_MAX(type) \
-	((type)~((type)1 << ((type)MOO_SIZEOF(type) * 8 - 1)))
+	((type)~((type)1 << ((type)MOO_SIZEOF(type) * MOO_BITS_PER_BYTE - 1)))
 #define MOO_TYPE_UNSIGNED_MAX(type) ((type)(~(type)0))

 #define MOO_TYPE_SIGNED_MIN(type) \
-	((type)((type)1 << ((type)MOO_SIZEOF(type) * 8 - 1)))
+	((type)((type)1 << ((type)MOO_SIZEOF(type) * MOO_BITS_PER_BYTE - 1)))
 #define MOO_TYPE_UNSIGNED_MIN(type) ((type)0)

 #define MOO_TYPE_MAX(type) \
--- a/moo/lib/moo-prv.h
+++ b/moo/lib/moo-prv.h
@ -250,10 +250,10 @@
 *   2. the maximum number of bit shifts can be represented in the moo_oow_t type.
 */
 #	define MOO_OBJ_SIZE_MAX ((moo_oow_t)MOO_SMOOI_MAX)
-#	define MOO_OBJ_SIZE_BITS_MAX (MOO_OBJ_SIZE_MAX * 8)
+#	define MOO_OBJ_SIZE_BITS_MAX (MOO_OBJ_SIZE_MAX * MOO_BITS_PER_BYTE)
 #else
 #	define MOO_OBJ_SIZE_MAX ((moo_oow_t)MOO_TYPE_MAX(moo_oow_t))
-#	define MOO_OBJ_SIZE_BITS_MAX (MOO_OBJ_SIZE_MAX * 8)
+#	define MOO_OBJ_SIZE_BITS_MAX (MOO_OBJ_SIZE_MAX * MOO_BITS_PER_BYTE)
 #endif


--- a/moo/lib/moo-utl.h
+++ b/moo/lib/moo-utl.h
@ -736,8 +736,9 @@ MOO_EXPORT moo_oow_t moo_utf16_to_uc (
 	moo_uch_t*       uc
 );

-/* ------------------------------------------------------------------------- */
-
+/* =========================================================================
+ * BIT SWAP
+ * ========================================================================= */
 #if defined(MOO_HAVE_INLINE)

 #if defined(MOO_HAVE_UINT16_T)
@ -748,8 +749,9 @@ static MOO_INLINE moo_uint16_t moo_bswap16 (moo_uint16_t x)
 #elif defined(__GNUC__) && (defined(__x86_64) || defined(__amd64) || defined(__i386) || defined(i386))
 	__asm__ /*volatile*/ ("xchgb %b0, %h0" : "=Q"(x): "0"(x));
 	return x;
-#elif defined(__GNUC__) && (defined(__ARM_ARCH) && (__ARM_ARCH >= 6))
+#elif defined(__GNUC__) && defined(__arm__) && (defined(__ARM_ARCH) && (__ARM_ARCH >= 6))
 	__asm__ /*volatile*/ ("rev16 %0, %0" : "+r"(x));
+	return x;
 #else
 	return (x << 8) | (x >> 8);
 #endif
@ -764,8 +766,12 @@ static MOO_INLINE moo_uint32_t moo_bswap32 (moo_uint32_t x)
 #elif defined(__GNUC__) && (defined(__x86_64) || defined(__amd64) || defined(__i386) || defined(i386))
 	__asm__ /*volatile*/ ("bswapl %0" : "=r"(x) : "0"(x));
 	return x;
-#elif defined(__GNUC__) && (defined(__ARM_ARCH) && (__ARM_ARCH >= 6))
+#elif defined(__GNUC__) && defined(__aarch64__)
 	__asm__ /*volatile*/ ("rev32 %0, %0" : "+r"(x));
+	return x;
+#elif defined(__GNUC__) && defined(__arm__) && (defined(__ARM_ARCH) && (__ARM_ARCH >= 6))
+	__asm__ /*volatile*/ ("rev %0, %0" : "+r"(x));
+	return x;
 #elif defined(__GNUC__) && defined(__ARM_ARCH)
 	moo_uint32_t tmp;
 	__asm__ /*volatile*/ (
@ -794,7 +800,8 @@ static MOO_INLINE moo_uint64_t moo_bswap64 (moo_uint64_t x)
 	__asm__ /*volatile*/ ("bswapq %0" : "=r"(x) : "0"(x));
 	return x;
 #elif defined(__GNUC__) && defined(__aarch64__)
-	__asm__ /*volatile*/ ("rev64 %0, %0" : "+r"(x));
+	__asm__ /*volatile*/ ("rev %0, %0" : "+r"(x));
+	return x;
 #else
 	return ((x >> 56)) | 
 	       ((x >> 40) & ((moo_uint64_t)0xff << 8)) | 
@ -837,17 +844,28 @@ static MOO_INLINE moo_uint128_t moo_bswap128 (moo_uint128_t x)
 #else

 #if defined(MOO_HAVE_UINT16_T)
+#	if defined(MOO_HAVE_BUILTIN_BSWAP16)
+#	define moo_bswap16(x) ((moo_uint16_t)__builtin_bswap16((moo_uint16_t)(x)))
+#	else 
 #	define moo_bswap16(x) ((moo_uint16_t)(((moo_uint16_t)(x)) << 8) | (((moo_uint16_t)(x)) >> 8))
+#	endif
 #endif

 #if defined(MOO_HAVE_UINT32_T)
+#	if defined(MOO_HAVE_BUILTIN_BSWAP32)
+#	define moo_bswap32(x) ((moo_uint32_t)__builtin_bswap32((moo_uint32_t)(x)))
+#	else 
 #	define moo_bswap32(x) ((moo_uint32_t)(((((moo_uint32_t)(x)) >> 24)) | \
 	                                      ((((moo_uint32_t)(x)) >>  8) & ((moo_uint32_t)0xff << 8)) | \
 	                                      ((((moo_uint32_t)(x)) <<  8) & ((moo_uint32_t)0xff << 16)) | \
 	                                      ((((moo_uint32_t)(x)) << 24))))
+#	endif
 #endif

 #if defined(MOO_HAVE_UINT64_T)
+#	if defined(MOO_HAVE_BUILTIN_BSWAP64)
+#	define moo_bswap64(x) ((moo_uint64_t)__builtin_bswap64((moo_uint64_t)(x)))
+#	else 
 #	define moo_bswap64(x) ((moo_uint64_t)(((((moo_uint64_t)(x)) >> 56)) | \
 	                                      ((((moo_uint64_t)(x)) >> 40) & ((moo_uint64_t)0xff << 8)) | \
 	                                      ((((moo_uint64_t)(x)) >> 24) & ((moo_uint64_t)0xff << 16)) | \
@ -856,9 +874,13 @@ static MOO_INLINE moo_uint128_t moo_bswap128 (moo_uint128_t x)
 	                                      ((((moo_uint64_t)(x)) << 24) & ((moo_uint64_t)0xff << 40)) | \
 	                                      ((((moo_uint64_t)(x)) << 40) & ((moo_uint64_t)0xff << 48)) | \
 	                                      ((((moo_uint64_t)(x)) << 56))))
+#	endif
 #endif

 #if defined(MOO_HAVE_UINT128_T)
+#	if defined(MOO_HAVE_BUILTIN_BSWAP128)
+#	define moo_bswap128(x) ((moo_uint128_t)__builtin_bswap128((moo_uint128_t)(x)))
+#	else 
 #	define moo_bswap128(x) ((moo_uint128_t)(((((moo_uint128_t)(x)) >> 120)) |  \
 	                                        ((((moo_uint128_t)(x)) >> 104) & ((moo_uint128_t)0xff << 8)) | \
 	                                        ((((moo_uint128_t)(x)) >>  88) & ((moo_uint128_t)0xff << 16)) | \
@ -875,6 +897,7 @@ static MOO_INLINE moo_uint128_t moo_bswap128 (moo_uint128_t x)
 	                                        ((((moo_uint128_t)(x)) <<  88) & ((moo_uint128_t)0xff << 104)) | \
 	                                        ((((moo_uint128_t)(x)) << 104) & ((moo_uint128_t)0xff << 112)) | \
 	                                        ((((moo_uint128_t)(x)) << 120))))
+#	endif
 #endif

 #endif /* MOO_HAVE_INLINE */
@ -961,9 +984,92 @@ static MOO_INLINE moo_uint128_t moo_bswap128 (moo_uint128_t x)
 #	error UNKNOWN ENDIAN
 #endif

+/* =========================================================================
+ * BIT POSITION
+ * ========================================================================= */
+static MOO_INLINE int moo_get_pos_of_msb_set_pow2 (moo_oow_t x)
+{
+	/* the caller must ensure that x is power of 2. if x happens to be zero,
+	 * the return value is undefined as each method used may give different result. */
+#if defined(MOO_HAVE_BUILTIN_CTZLL) && (MOO_SIZEOF_OOW_T == MOO_SIZEOF_LONG_LONG)
+	return __builtin_ctzll(x); /* count the number of trailing zeros */
+#elif defined(MOO_HAVE_BUILTIN_CTZL) && (MOO_SIZEOF_OOW_T == MOO_SIZEOF_LONG)
+	return __builtin_ctzl(x); /* count the number of trailing zeros */
+#elif defined(MOO_HAVE_BUILTIN_CTZ) && (MOO_SIZEOF_OOW_T == MOO_SIZEOF_INT)
+	return __builtin_ctz(x); /* count the number of trailing zeros */
+#elif defined(__GNUC__) && (defined(__x86_64) || defined(__amd64) || defined(__i386) || defined(i386))
+	moo_oow_t pos;
+	/* use the Bit Scan Forward instruction */
+#if 1
+	__asm__ volatile (
+		"bsf %1,%0\n\t"
+		: "=r"(pos) /* output */
+		: "r"(x) /* input */
+	);
+#else
+	__asm__ volatile (
+		"bsf %[X],%[EXP]\n\t"
+		: [EXP]"=r"(pos) /* output */
+		: [X]"r"(x) /* input */
+	);
+#endif
+	return (int)pos;
+#elif defined(__GNUC__) && defined(__aarch64__) || (defined(__arm__) && (defined(__ARM_ARCH) && (__ARM_ARCH >= 5)))
+	moo_oow_t n;
+	/* CLZ is available in ARMv5T and above. there is no instruction to
+	 * count trailing zeros or something similar. using RBIT with CLZ
+	 * would be good in ARMv6T2 and above to avoid further calculation
+	 * afte CLZ */
+	__asm__ volatile (
+		"clz %0,%1\n\t"
+		: "=r"(n) /* output */
+		: "r"(x) /* input */
+	);
+	return (int)(MOO_OOW_BITS - n - 1); 
+	/* TODO: PPC - use cntlz, cntlzw, cntlzd, SPARC - use lzcnt, MIPS clz */
+#else
+	int pos = 0;
+	while (x >>= 1) pos++;
+	return pos;
+#endif
+}
+
+static MOO_INLINE int moo_get_pos_of_msb_set (moo_oow_t x)
+{
+	/* x doesn't have to be power of 2. if x is zero, the result is undefined */
+#if defined(MOO_HAVE_BUILTIN_CLZLL) && (MOO_SIZEOF_OOW_T == MOO_SIZEOF_LONG_LONG)
+	return MOO_OOW_BITS - __builtin_clzll(x) - 1; /* count the number of leading zeros */
+#elif defined(MOO_HAVE_BUILTIN_CLZL) && (MOO_SIZEOF_OOW_T == MOO_SIZEOF_LONG)
+	return MOO_OOW_BITS - __builtin_clzl(x) - 1; /* count the number of leading zeros */
+#elif defined(MOO_HAVE_BUILTIN_CLZ) && (MOO_SIZEOF_OOW_T == MOO_SIZEOF_INT)
+	return MOO_OOW_BITS - __builtin_clz(x) - 1; /* count the number of leading zeros */
+#elif defined(__GNUC__) && (defined(__x86_64) || defined(__amd64) || defined(__i386) || defined(i386))
+	/* bit scan reverse. not all x86 CPUs have LZCNT. */
+	moo_oow_t pos;
+	__asm__ volatile (
+		"bsr %1,%0\n\t"
+		: "=r"(pos) /* output */
+		: "r"(x) /* input */
+	);
+	return (int)pos;
+#elif defined(__GNUC__) && defined(__aarch64__) || (defined(__arm__) && (defined(__ARM_ARCH) && (__ARM_ARCH >= 5)))
+	moo_oow_t n;
+	__asm__ volatile (
+		"clz %0,%1\n\t"
+		: "=r"(n) /* output */
+		: "r"(x) /* input */
+	);
+	return (int)(MOO_OOW_BITS - n - 1); 
+	/* TODO: PPC - use cntlz, cntlzw, cntlzd, SPARC - use lzcnt, MIPS clz */
+#else
+	int pos = 0;
+	while (x >>= 1) pos++;
+	return pos;
+#endif
+}
+
 #if defined(__cplusplus)
 }
 #endif

-
 #endif
--- a/moo/lib/moo.h
+++ b/moo/lib/moo.h
@ -153,11 +153,7 @@ typedef struct moo_obj_byte_t*     moo_oop_byte_t;
 typedef struct moo_obj_halfword_t* moo_oop_halfword_t;
 typedef struct moo_obj_word_t*     moo_oop_word_t;

-#define MOO_OOW_BITS  (MOO_SIZEOF_OOW_T * 8)
-#define MOO_OOI_BITS  (MOO_SIZEOF_OOI_T * 8)
-#define MOO_OOP_BITS  (MOO_SIZEOF_OOP_T * 8)
-#define MOO_OOHW_BITS (MOO_SIZEOF_OOHW_T * 8)
-
+#define MOO_OOP_BITS  (MOO_SIZEOF_OOP_T * MOO_BITS_PER_BYTE)

 /* =========================================================================
 * BIGINT TYPES AND MACROS
@ -174,7 +170,7 @@ typedef struct moo_obj_word_t*     moo_oop_word_t;
 #	define MOO_SIZEOF_LIW_T    MOO_SIZEOF_OOW_T
 #	define MOO_SIZEOF_LIDW_T   MOO_SIZEOF_UINTMAX_T
 #	define MOO_LIW_BITS        MOO_OOW_BITS
-#	define MOO_LIDW_BITS       (MOO_SIZEOF_UINTMAX_T * 8) 
+#	define MOO_LIDW_BITS       (MOO_SIZEOF_UINTMAX_T * MOO_BITS_PER_BYTE) 

 	typedef moo_oop_word_t     moo_oop_liword_t;
 #	define MOO_OBJ_TYPE_LIWORD MOO_OBJ_TYPE_WORD
--- a/moo/lib/pf-sys.c
+++ b/moo/lib/pf-sys.c
@ -256,7 +256,7 @@ static MOO_INLINE int _store_raw_int (moo_t* moo, moo_uint8_t* rawptr, moo_oow_t
 	}

 	/* assume 2's complement */
-	max = (moo_ooi_t)(~(moo_oow_t)0 >> ((MOO_SIZEOF_OOW_T - size) * 8  + 1));
+	max = (moo_ooi_t)(~(moo_oow_t)0 >> ((MOO_SIZEOF_OOW_T - size) * MOO_BITS_PER_BYTE  + 1));
 	min = -max - 1;

 	if (w > max || w < min) 
@ -314,7 +314,7 @@ static MOO_INLINE int _store_raw_uint (moo_t* moo, moo_uint8_t* rawptr, moo_oow_
 		return -1;
 	}

-	max = (~(moo_oow_t)0 >> ((MOO_SIZEOF_OOW_T - size) * 8));
+	max = (~(moo_oow_t)0 >> ((MOO_SIZEOF_OOW_T - size) * MOO_BITS_PER_BYTE));
 	if (w > max) 
 	{
 		moo_seterrbfmt (moo, MOO_ERANGE, "value %ju out of supported range for raw unsigned memory store",  w);
--- a/moo/lib/utl.c
+++ b/moo/lib/utl.c
@ -404,7 +404,7 @@ moo_bch_t* moo_find_bchar_in_bcstr (const moo_bch_t* ptr, moo_bch_t c)

 moo_oow_t moo_byte_to_bcstr (moo_uint8_t byte, moo_bch_t* buf, moo_oow_t size, int flagged_radix, moo_bch_t fill)
 {
-	moo_bch_t tmp[(MOO_SIZEOF(moo_uint8_t) * 8)];
+	moo_bch_t tmp[(MOO_SIZEOF(moo_uint8_t) * MOO_BITS_PER_BYTE)];
 	moo_bch_t* p = tmp, * bp = buf, * be = buf + size - 1;
 	int radix;
 	moo_bch_t radix_char;
--- a/moo/t/Makefile.am
+++ b/moo/t/Makefile.am
@ -16,5 +16,6 @@ if WIN32
 LDADD += $(SOCKET_LIBS)
 endif

-bin_PROGRAMS = t-001
+bin_PROGRAMS = t-001 t-002
 t_001_SOURCES = t-001.c
+t_002_SOURCES = t-002.c
--- a/moo/t/Makefile.in
+++ b/moo/t/Makefile.in
@ -89,7 +89,7 @@ POST_UNINSTALL = :
 build_triplet = @build@
 host_triplet = @host@
@WIN32_TRUE@am__append_1 = $(SOCKET_LIBS)
-bin_PROGRAMS = t-001$(EXEEXT)
+bin_PROGRAMS = t-001$(EXEEXT) t-002$(EXEEXT)
 subdir = t
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
 am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_sign.m4 \
@ -110,9 +110,13 @@ PROGRAMS = $(bin_PROGRAMS)
 am_t_001_OBJECTS = t-001.$(OBJEXT)
 t_001_OBJECTS = $(am_t_001_OBJECTS)
 t_001_LDADD = $(LDADD)
+am_t_002_OBJECTS = t-002.$(OBJEXT)
+t_002_OBJECTS = $(am_t_002_OBJECTS)
+t_002_LDADD = $(LDADD)
 am__DEPENDENCIES_1 =
@WIN32_TRUE@am__DEPENDENCIES_2 = $(am__DEPENDENCIES_1)
 t_001_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_2)
+t_002_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_2)
 AM_V_lt = $(am__v_lt_@AM_V@)
 am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
 am__v_lt_0 = --silent
@ -151,8 +155,8 @@ AM_V_CCLD = $(am__v_CCLD_@AM_V@)
 am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
 am__v_CCLD_0 = @echo "  CCLD    " $@;
 am__v_CCLD_1 = 
-SOURCES = $(t_001_SOURCES)
-DIST_SOURCES = $(t_001_SOURCES)
+SOURCES = $(t_001_SOURCES) $(t_002_SOURCES)
+DIST_SOURCES = $(t_001_SOURCES) $(t_002_SOURCES)
 am__can_run_installinfo = \
  case $$AM_UPDATE_INFO_DIR in \
    n|no|NO) false;; \
@ -344,6 +348,7 @@ AM_CPPFLAGS = \
 AM_LDFLAGS = -L$(abs_builddir)/../lib  -L$(libdir)
 LDADD = -lmoo $(PTHREAD_LIBS) $(am__append_1)
 t_001_SOURCES = t-001.c
+t_002_SOURCES = t-002.c
 all: all-am

 .SUFFIXES:
@ -431,6 +436,10 @@ t-001$(EXEEXT): $(t_001_OBJECTS) $(t_001_DEPENDENCIES) $(EXTRA_t_001_DEPENDENCIE
 	@rm -f t-001$(EXEEXT)
 	$(AM_V_CCLD)$(LINK) $(t_001_OBJECTS) $(t_001_LDADD) $(LIBS)

+t-002$(EXEEXT): $(t_002_OBJECTS) $(t_002_DEPENDENCIES) $(EXTRA_t_002_DEPENDENCIES) 
+	@rm -f t-002$(EXEEXT)
+	$(AM_V_CCLD)$(LINK) $(t_002_OBJECTS) $(t_002_LDADD) $(LIBS)
+
 mostlyclean-compile:
 	-rm -f *.$(OBJEXT)

@ -438,6 +447,7 @@ distclean-compile:
 	-rm -f *.tab.c

@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t-001.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/t-002.Po@am__quote@

 .c.o:
@am__fastdepCC_TRUE@	$(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\
--- a/moo/t/t-002.c
+++ b/moo/t/t-002.c
@ -0,0 +1,35 @@
+/* test bit position functions */
+
+#include <moo-utl.h>
+#include <stdio.h>
+#include "t.h"
+
+int main ()
+{
+	int i, j;
+	moo_oow_t v;
+
+	printf ("QSE_OOW_BITS => %d, sizeof(moo_oow_t)=%d\n", (int)MOO_OOW_BITS, (int)sizeof(moo_oow_t));
+	for (i = 0; i < MOO_OOW_BITS; i++)
+	{
+		v = ((moo_oow_t)1 << i);
+		j = moo_get_pos_of_msb_set_pow2(v);
+		printf ("msb(pow2) %d %d ==> %llx\n", i, j, (long long int)v);
+		T_ASSERT1 (i == j, "msb(pow2) position tester");
+	}
+
+	for (i = 0; i < MOO_OOW_BITS; i++)
+	{
+		v = ((moo_oow_t)1 << i);
+		v |= 1;
+		j = moo_get_pos_of_msb_set(v);
+		printf ("msb %d %d ==> %llx\n", i, j, (long long int)v);
+		T_ASSERT1 (i == j, "msb position tester");
+	}
+
+
+	return 0;
+
+oops:
+	return -1;
+}