Created
July 9, 2012 21:13
-
-
Save zchothia/3078968 to your computer and use it in GitHub Desktop.
Revisions
-
zchothia created this gist
Jul 9, 2012 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,258 @@ // AVX CPU dispatching - based on Agner Fog's C++ vector class library: // http://www.agner.org/optimize/vectorclass.zip #include <stdio.h> #include <stdbool.h> //------------------------------------------------------------------------------ //>> BEGIN <instrset.h> // Detect 64 bit mode #if (defined(_M_AMD64) || defined(_M_X64) || defined(__amd64) ) && ! defined(__x86_64__) # define __x86_64__ 1 // There are many different macros for this, decide on only one #endif // Find instruction set from compiler macros if INSTRSET not defined // Note: Not all compilers define these macros automatically #ifndef INSTRSET #if defined(__AVX2__) # define INSTRSET 8 #elif defined(__AVX__) # define INSTRSET 7 #elif defined(__SSE4_2__) # define INSTRSET 6 #elif defined(__SSE4_1__) # define INSTRSET 5 #elif defined(__SSSE3__) # define INSTRSET 4 #elif defined(__SSE3__) # define INSTRSET 3 #elif defined(__SSE2__) || defined(__x86_64__) # define INSTRSET 2 #elif defined(__SSE__) # define INSTRSET 1 #elif defined(_M_IX86_FP) // Defined in MS compiler. 1: SSE, 2: SSE2 # define INSTRSET _M_IX86_FP #else # define INSTRSET 0 #endif // instruction set defines #endif // INSTRSET // Include the appropriate header file for intrinsic functions #if INSTRSET > 7 // AVX2 and later # ifdef __GNUC__ # include <x86intrin.h> // x86intrin.h includes header files for whatever instruction // sets are specified on the compiler command line, such as: // xopintrin.h, fma4intrin.h # else # include <immintrin.h> // MS version of immintrin.h covers AVX, AVX2 and FMA3 # endif // __GNUC__ #elif INSTRSET == 7 # include <immintrin.h> // AVX #elif INSTRSET == 6 # include <nmmintrin.h> // SSE4.2 #elif INSTRSET == 5 # include <smmintrin.h> // SSE4.1 #elif INSTRSET == 4 # include <tmmintrin.h> // SSSE3 #elif INSTRSET == 3 # include <pmmintrin.h> // SSE3 #elif INSTRSET == 2 # include <emmintrin.h> // SSE2 #elif INSTRSET == 1 # include <xmmintrin.h> // SSE #endif // INSTRSET // AMD instruction sets #ifdef __XOP__ # ifdef __GNUC__ # include <x86intrin.h> // AMD XOP (Gnu) # else # include <ammintrin.h> // AMD XOP (Microsoft) # endif // __GNUC__ #elif defined (__SSE4A__) // AMD SSE4A # include <ammintrin.h> #endif // __XOP__ // Define integer types with known size #if defined(__GNUC__) || (defined(_MSC_VER) && _MSC_VER >= 1600) // Compilers supporting C99 or C++0x have stdint.h defining these integer types #include <stdint.h> #elif defined(_MSC_VER) // Older Microsoft compilers have their own definitions typedef signed __int8 int8_t; typedef unsigned __int8 uint8_t; typedef signed __int16 int16_t; typedef unsigned __int16 uint16_t; typedef signed __int32 int32_t; typedef unsigned __int32 uint32_t; typedef signed __int64 int64_t; typedef unsigned __int64 uint64_t; #ifndef _INTPTR_T_DEFINED #define _INTPTR_T_DEFINED #ifdef __x86_64__ typedef int64_t intptr_t; #else typedef int32_t intptr_t; #endif #endif #else // This works with most compilers typedef signed char int8_t; typedef unsigned char uint8_t; typedef signed short int int16_t; typedef unsigned short int uint16_t; typedef signed int int32_t; typedef unsigned int uint32_t; typedef long long int64_t; typedef unsigned long long uint64_t; #ifdef __x86_64__ typedef int64_t intptr_t; #else typedef int32_t intptr_t; #endif #endif #include <stdlib.h> // define abs(int) #ifdef _MSC_VER // Microsoft compiler or compatible Intel compiler #include <intrin.h> // define _BitScanReverse(int), __cpuid(int[4],int), _xgetbv(int) #endif // _MSC_VER // functions in instrset_detect.cpp int instrset_detect(void); // tells which instruction sets are supported bool hasFMA3(void); // true if FMA3 instructions supported bool hasFMA4(void); // true if FMA4 instructions supported bool hasXOP (void); // true if XOP instructions supported // GCC version #if defined(__GNUC__) && ! defined (GCC_VERSION) #define GCC_VERSION ((__GNUC__) * 10000 + (__GNUC_MINOR__) * 100 + (__GNUC_PATCHLEVEL__)) #endif //<< END <instrset.h> //------------------------------------------------------------------------------ //>> BEGIN <instrset_detect.cpp> // Define interface to cpuid instruction. // input: eax = functionnumber, ecx = 0 // output: eax = output[0], ebx = output[1], ecx = output[2], edx = output[3] static inline void cpuid (int output[4], int functionnumber) { #if defined (_MSC_VER) || defined (__INTEL_COMPILER) // Microsoft or Intel compiler, intrin.h included __cpuidex(output, functionnumber, 0); // intrinsic function for CPUID #elif defined(__GNUC__) // use inline assembly, Gnu/AT&T syntax int a, b, c, d; __asm("cpuid" : "=a"(a),"=b"(b),"=c"(c),"=d"(d) : "a"(functionnumber),"c"(0) : ); output[0] = a; output[1] = b; output[2] = c; output[3] = d; #else // unknown platform. try inline assembly with masm/intel syntax __asm { mov eax, functionnumber xor ecx, ecx cpuid; mov esi, output mov [esi], eax mov [esi+4], ebx mov [esi+8], ecx mov [esi+12], edx } #endif } // Define interface to xgetbv instruction static inline int64_t xgetbv (int ctr) { #if (defined (_MSC_FULL_VER) && _MSC_FULL_VER >= 160040000) || (defined (__INTEL_COMPILER) && __INTEL_COMPILER >= 1200) // Microsoft or Intel compiler supporting _xgetbv intrinsic return _xgetbv(ctr); // intrinsic function for XGETBV #elif defined(__GNUC__) // use inline assembly, Gnu/AT&T syntax uint32_t a, d; __asm("xgetbv" : "=a"(a),"=d"(d) : "c"(ctr) : ); return a | (((uint64_t) d) << 32); #else // #elif defined (_WIN32) // other compiler. try inline assembly with masm/intel/MS syntax uint32_t a, d; __asm { mov ecx, ctr _emit 0x0f _emit 0x01 _emit 0xd0 ; // xgetbv mov a, eax mov d, edx } return a | (uint64_t(d) << 32); #endif } /* find supported instruction set return value: 0 = 80386 instruction set 1 or above = SSE (XMM) supported by CPU (not testing for O.S. support) 2 or above = SSE2 3 or above = SSE3 4 or above = Supplementary SSE3 (SSSE3) 5 or above = SSE4.1 6 or above = SSE4.2 7 or above = AVX supported by CPU and operating system 8 or above = AVX2 */ int instrset_detect(void) { static int iset = -1; // remember value for next call if (iset >= 0) { return iset; // called before } iset = 0; // default value int abcd[4] = {0,0,0,0}; // cpuid results cpuid(abcd, 0); // call cpuid function 0 if (abcd[0] == 0) return iset; // no further cpuid function supported cpuid(abcd, 1); // call cpuid function 1 for feature flags if ((abcd[3] & (1 << 0)) == 0) return iset; // no floating point if ((abcd[3] & (1 << 23)) == 0) return iset; // no MMX if ((abcd[3] & (1 << 15)) == 0) return iset; // no conditional move if ((abcd[3] & (1 << 24)) == 0) return iset; // no FXSAVE if ((abcd[3] & (1 << 25)) == 0) return iset; // no SSE iset = 1; // 1: SSE supported if ((abcd[3] & (1 << 26)) == 0) return iset; // no SSE2 iset = 2; // 2: SSE2 supported if ((abcd[2] & (1 << 0)) == 0) return iset; // no SSE3 iset = 3; // 3: SSE3 supported if ((abcd[2] & (1 << 9)) == 0) return iset; // no SSSE3 iset = 4; // 4: SSSE3 supported if ((abcd[2] & (1 << 19)) == 0) return iset; // no SSE4.1 iset = 5; // 5: SSE4.1 supported if ((abcd[2] & (1 << 23)) == 0) return iset; // no POPCNT if ((abcd[2] & (1 << 20)) == 0) return iset; // no SSE4.2 iset = 6; // 6: SSE4.2 supported if ((abcd[2] & (1 << 27)) == 0) return iset; // no OSXSAVE if ((xgetbv(0) & 6) != 6) return iset; // AVX not enabled in O.S. if ((abcd[2] & (1 << 28)) == 0) return iset; // no AVX iset = 7; // 7: AVX supported cpuid(abcd, 7); // call cpuid leaf 7 for feature flags if ((abcd[1] & (1 << 5)) == 0) return iset; // no AVX2 iset = 8; // 8: AVX2 supported return iset; } //<< END <instrset_detect.cpp> //------------------------------------------------------------------------------ int main() { int iset = instrset_detect(); bool hasAVX = iset >= 7; printf("Has AVX? %s (iset = %d)\n", hasAVX ? "Yes!" : "No.", iset); return 0; }