-
-
Save pps83/3210a2f980fd02bb2ba2e5a1fc4a2ef0 to your computer and use it in GitHub Desktop.
| // Note, bsf/bsr are used by default. | |
| // Enable /arch:AVX2 compilation for better optimizations | |
| #if defined(_MSC_VER) && !defined(__clang__) | |
| #include <intrin.h> | |
| #include <limits.h> | |
| #if (defined(__cplusplus) && (__cplusplus >= 202002L)) || \ | |
| (defined(_MSVC_LANG) && (_MSVC_LANG >= 202002L)) | |
| #include <type_traits> | |
| #define CONSTEVAL_ (std::is_constant_evaluated()) | |
| #define CONSTEXPR_ constexpr | |
| #else | |
| #define CONSTEXPR_ | |
| #endif | |
| static CONSTEXPR_ __forceinline int __builtin_ctz(unsigned x) | |
| { | |
| #ifdef CONSTEVAL_ | |
| if CONSTEVAL_ | |
| { | |
| for (int i = 0; i < sizeof(x) * CHAR_BIT; ++i) | |
| { | |
| if ((x >> i) & 1) | |
| return i; | |
| } | |
| return sizeof(x) * CHAR_BIT; | |
| } | |
| #endif | |
| #if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) | |
| return (int)_CountTrailingZeros(x); | |
| #elif defined(__AVX2__) || defined(__BMI__) | |
| return (int)_tzcnt_u32(x); | |
| #else | |
| unsigned long r; | |
| _BitScanForward(&r, x); | |
| return (int)r; | |
| #endif | |
| } | |
| static CONSTEXPR_ __forceinline int __builtin_ctzll(unsigned long long x) | |
| { | |
| #ifdef CONSTEVAL_ | |
| if CONSTEVAL_ | |
| { | |
| for (int i = 0; i < sizeof(x) * CHAR_BIT; ++i) | |
| { | |
| if ((x >> i) & 1) | |
| return i; | |
| } | |
| return sizeof(x) * CHAR_BIT; | |
| } | |
| #endif | |
| #if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) | |
| return (int)_CountTrailingZeros64(x); | |
| #elif defined(_WIN64) | |
| #if defined(__AVX2__) || defined(__BMI__) | |
| return (int)_tzcnt_u64(x); | |
| #else | |
| unsigned long r; | |
| _BitScanForward64(&r, x); | |
| return (int)r; | |
| #endif | |
| #else | |
| int l = __builtin_ctz((unsigned)x); | |
| int h = __builtin_ctz((unsigned)(x >> 32)) + 32; | |
| return !!((unsigned)x) ? l : h; | |
| #endif | |
| } | |
| static CONSTEXPR_ __forceinline int __builtin_ctzl(unsigned long x) | |
| { | |
| return sizeof(x) == 8 ? __builtin_ctzll(x) : __builtin_ctz((unsigned)x); | |
| } | |
| static CONSTEXPR_ __forceinline int __builtin_clz(unsigned x) | |
| { | |
| #ifdef CONSTEVAL_ | |
| if CONSTEVAL_ | |
| { | |
| for (int i = 0; i < sizeof(x) * CHAR_BIT; ++i) | |
| { | |
| if (x >> (sizeof(x) * CHAR_BIT - 1 - i)) | |
| return i; | |
| } | |
| return sizeof(x) * CHAR_BIT; | |
| } | |
| #endif | |
| #if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) | |
| return (int)_CountLeadingZeros(x); | |
| #elif defined(__AVX2__) || defined(__LZCNT__) | |
| return (int)_lzcnt_u32(x); | |
| #else | |
| unsigned long r; | |
| _BitScanReverse(&r, x); | |
| return (int)(r ^ 31); | |
| #endif | |
| } | |
| static CONSTEXPR_ __forceinline int __builtin_clzll(unsigned long long x) | |
| { | |
| #ifdef CONSTEVAL_ | |
| if CONSTEVAL_ | |
| { | |
| for (int i = 0; i < sizeof(x) * CHAR_BIT; ++i) | |
| { | |
| if (x >> (sizeof(x) * CHAR_BIT - 1 - i)) | |
| return i; | |
| } | |
| return sizeof(x) * CHAR_BIT; | |
| } | |
| #endif | |
| #if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) | |
| return (int)_CountLeadingZeros64(x); | |
| #elif defined(_WIN64) | |
| #if defined(__AVX2__) || defined(__LZCNT__) | |
| return (int)_lzcnt_u64(x); | |
| #else | |
| unsigned long r; | |
| _BitScanReverse64(&r, x); | |
| return (int)(r ^ 63); | |
| #endif | |
| #else | |
| int l = __builtin_clz((unsigned)x) + 32; | |
| int h = __builtin_clz((unsigned)(x >> 32)); | |
| return !!((unsigned)(x >> 32)) ? h : l; | |
| #endif | |
| } | |
| static CONSTEXPR_ __forceinline int __builtin_clzl(unsigned long x) | |
| { | |
| return sizeof(x) == 8 ? __builtin_clzll(x) : __builtin_clz((unsigned)x); | |
| } | |
| #undef CONSTEVAL_ | |
| #undef CONSTEXPR_ | |
| #endif // defined(_MSC_VER) && !defined(__clang__) |
Hi Pavel,
This is really useful! I'd like to include this as a support file in my C container library.
https://github.com/nickpelling/C_STD
Is there a copyright or licence notice you'd like this published under? I use the MIT license because it's most permissive (I think):
https://github.com/nickpelling/C_STD?tab=MIT-1-ov-file
Hi Pavel,
This is really useful! I'd like to include this as a support file in my C container library. https://github.com/nickpelling/C_STD
Is there a copyright or licence notice you'd like this published under? I use the MIT license because it's most permissive (I think): https://github.com/nickpelling/C_STD?tab=MIT-1-ov-file
OK to take it as is. Hopefully it's useful for users of C_STD.
Note, that the code uses _tzcnt_u32, _tzcnt_u64, _lzcnt_u32 _lzcnt_u64. AFAIK, these are BMI (or BMI2), so, it might be a good idea to use bsf variants instead for generic targets.
Thanks very much, Pavel, that's perfect, you're a star! :-)
Thanks very much, Pavel, that's perfect, you're a star! :-)
Note, I updated it to:
- check for
__AVX2__builds to use lzcnt/tzcnt vs bsf/bsr - add code to handle arm/arm64
- add guard for clang-cl
The code was tested to verify that it's bit exact with the code that clang/gcc emits for these builtins.
@pps83 thanks see my updates too, still digging up between targets ; that's funny or not, this simple thing is @$##^&&% to get right ; just for getting a
bit_width, all I want that's a portablebit_widthoperator jeez ; soft emulation takes no ROM ; maybe with a small lookup table I could shrink down the number of ops... whatever just want bit_width, can I get that easy... it seems not. ICC have _lzcnt_u64, _lzcnt_u32 and IBM XL __cntlz4, __cntlz8, but they have now a clang front-end so ; clang forever. ```update ARMCC has __rbit() and __clz() however that's a clang fork so... just logging for posterity.For a soft emulation fallback (according to my CPU benchmarks) the following is the best
player: