/* * Author: Nicu Tofan * License: BSD * * See below for getRealTime() license. */ #include #include #include #define test_m 2 #define test_k 3 #define test_n 4 #define test_m_extra 5 #define test_k_extra 7 #define test_n_extra 8 #define test_row_major 1 #define test_col_major 0 double getRealTime(); //! prints a matrix in either row major or column major order static void printMatrix (double *data, int rows, int cols, int row_major) { int i,j; // column header printf (" "); for(j=0; j= m // if TRANSA = 'T' then >= k B, // B source matrix test_k_extra, // true lenght of a column in B // if TRANSB = 'N' then >= k // if TRANSB = 'T' then >= n 0.0, // beta = 0.0 C, // C destination matrix test_n_extra); // true lenght of a row in C // must be >= m printMatrix (C, test_n_extra, test_n, test_col_major); } //! blas expects column major order static void testMulColMajor() { printf ("==========================================================\n"); printf ("Test BLAS multiply - column major order\n"); printf ("==========================================================\n\n"); /* MATRIX A[2x3] Col 1 Col 2 Col 3 Row 1: 1.0000 2.0000 3.0000 Row 2: 4.0000 5.0000 6.0000 */ printf ("A\n\n"); double A[test_m*test_k] = { 1, 4, 2, 5, 3, 6 }; printMatrix (A, test_m, test_k, test_col_major); /* MATRIX B[3x4] Col 1 Col 2 Col 3 Col 4 Row 1: 1.0000 2.0000 3.0000 4.0000 Row 2: 0.1000 0.2000 0.3000 0.4000 Row 3: 0.0100 0.0200 0.0300 0.0400 */ printf ("B\n\n"); double B[test_k*test_n] = { 1.0, 0.1, 0.01, 2.0, 0.2, 0.02, 3.0, 0.3, 0.03, 4.0, 0.4, 0.04 }; printMatrix (B, test_k, test_n, test_col_major); /* MATRIX C[2x4] Col 1 Col 2 Col 3 Col 4 Row 1: 1.2300 2.4600 3.6900 4.9200 Row 2: 4.5600 9.1200 13.6800 18.2400 */ double C[test_m*test_n] = {0}; printf ("C\n\n"); // http://www.math.utah.edu/software/lapack/lapack-blas/dgemm.html /* cblas_dgemm( 'N', // TRANSA: N - do not transpose A, T - transpose A 'N', // TRANSB: N - do not transpose A, T - transpose A test_m, // A matrix: m rows, k columns test_n, // B matrix: k rows, n columns test_k, // C matrix: m rows, n columns 1.0, // alpha = 1.0 A, // A source matrix test_m, // true lenght of a column in A // if TRANSA = 'N' then >= m // if TRANSA = 'T' then >= k B, // B source matrix test_k, // true lenght of a column in B // if TRANSB = 'N' then >= k // if TRANSB = 'T' then >= n 0.0, // beta = 0.0 C, // C destination matrix test_m); // true lenght of a row in C // must be >= m */ // C = alpha*A*B+beta*C cblas_dgemm( CblasColMajor, CblasNoTrans, // TRANSA: CblasNoTrans, // TRANSB: test_m, // A matrix: m rows, k columns test_n, // B matrix: k rows, n columns test_k, // C matrix: m rows, n columns 1.0, // alpha = 1.0 A, // A source matrix test_m, // true lenght of a column in A // if TRANSA = 'N' then >= m // if TRANSA = 'T' then >= k B, // B source matrix test_k, // true lenght of a column in B // if TRANSB = 'N' then >= k // if TRANSB = 'T' then >= n 0.0, // beta = 0.0 C, // C destination matrix test_m); // true lenght of a row in C // must be >= m printMatrix (C, test_m, test_n, test_col_major); } //! attempt the trickery static void testMulRowMajor_trickery() { printf ("==========================================================\n"); printf ("Test BLAS multiply - row major order with trickery\n"); printf ("==========================================================\n\n"); /* MATRIX A[2x3] Col 1 Col 2 Col 3 Row 1: 1.0000 2.0000 3.0000 Row 2: 4.0000 5.0000 6.0000 */ printf ("A\n\n"); double A[test_m*test_k] = { 1, 2, 3, 4, 5, 6 }; printMatrix (A, test_m, test_k, test_row_major); /* MATRIX B[3x4] Col 1 Col 2 Col 3 Col 4 Row 1: 1.0000 2.0000 3.0000 4.0000 Row 2: 0.1000 0.2000 0.3000 0.4000 Row 3: 0.0100 0.0200 0.0300 0.0400 */ printf ("B\n\n"); double B[test_k*test_n] = { 1.00, 2.00, 3.00, 4.00, 0.10, 0.20, 0.30, 0.40, 0.01, 0.02, 0.03, 0.04 }; printMatrix (B, test_k, test_n, test_row_major); /* MATRIX C[2x4] Col 1 Col 2 Col 3 Col 4 Row 1: 1.2300 2.4600 3.6900 4.9200 Row 2: 4.5600 9.1200 13.6800 18.2400 */ double C[test_m*test_n] = {0}; printf ("C\n\n"); // C = alpha*A*B+beta*C cblas_dgemm( CblasColMajor, CblasNoTrans, // TRANSA: CblasNoTrans, // TRANSB: test_n, // B matrix: k rows, n columns test_m, // A matrix: m rows, k columns test_k, // C matrix: m rows, n columns 1.0, // alpha = 1.0 B, // B source matrix test_n, // true lenght of a column in B // if TRANSB = 'N' then >= k // if TRANSB = 'T' then >= n A, // A source matrix test_k, // true lenght of a column in A // if TRANSA = 'N' then >= m // if TRANSA = 'T' then >= k 0.0, // beta = 0.0 C, // C destination matrix test_n); // true lenght of a row in C // must be >= m printMatrix (C, test_m, test_n, test_row_major); } //! build in version static void testMulRowMajor() { printf ("==========================================================\n"); printf ("Test BLAS multiply - row major order without trickery\n"); printf ("==========================================================\n\n"); /* MATRIX A[2x3] Col 1 Col 2 Col 3 Row 1: 1.0000 2.0000 3.0000 Row 2: 4.0000 5.0000 6.0000 */ printf ("A\n\n"); double A[test_m*test_k] = { 1, 2, 3, 4, 5, 6 }; printMatrix (A, test_m, test_k, test_row_major); /* MATRIX B[3x4] Col 1 Col 2 Col 3 Col 4 Row 1: 1.0000 2.0000 3.0000 4.0000 Row 2: 0.1000 0.2000 0.3000 0.4000 Row 3: 0.0100 0.0200 0.0300 0.0400 */ printf ("B\n\n"); double B[test_k*test_n] = { 1.00, 2.00, 3.00, 4.00, 0.10, 0.20, 0.30, 0.40, 0.01, 0.02, 0.03, 0.04 }; printMatrix (B, test_k, test_n, test_row_major); /* MATRIX C[2x4] Col 1 Col 2 Col 3 Col 4 Row 1: 1.2300 2.4600 3.6900 4.9200 Row 2: 4.5600 9.1200 13.6800 18.2400 */ double C[test_m*test_n] = {0}; printf ("C\n\n"); // C = alpha*A*B+beta*C cblas_dgemm( CblasRowMajor, CblasNoTrans, // TRANSA: CblasNoTrans, // TRANSB: test_m, // A matrix: m rows, k columns test_n, // B matrix: k rows, n columns test_k, // C matrix: m rows, n columns 1.0, // alpha = 1.0 A, // A source matrix test_k, // true lenght of a column in A // if TRANSA = 'N' then >= m // if TRANSA = 'T' then >= k B, // B source matrix test_n, // true lenght of a column in B // if TRANSB = 'N' then >= k // if TRANSB = 'T' then >= n 0.0, // beta = 0.0 C, // C destination matrix test_n); // true lenght of a row in C // must be >= m printMatrix (C, test_m, test_n, test_col_major); } #undef test_m #undef test_n #undef test_k #define test_m 500 #define test_k 600 #define test_n 700 #define test_spins 5 // --------------------------------------------------------------------- #define IMPLEMENTATION_ROW_MAJOR \ cblas_dgemm( \ CblasRowMajor, CblasNoTrans, CblasNoTrans, test_m, test_n, \ test_k, 1.0, A, test_k, B, test_n, 0.0, C, test_n); \ startTime = getRealTime (); \ for(i=0; i #elif defined(__unix__) || defined(__unix) || defined(unix) || (defined(__APPLE__) && defined(__MACH__)) #include /* POSIX flags */ #include /* clock_gettime(), time() */ #include /* gethrtime(), gettimeofday() */ #if defined(__MACH__) && defined(__APPLE__) #include #include #endif #else #error "Unable to define getRealTime( ) for an unknown OS." #endif /** * Returns the real time, in seconds, or -1.0 if an error occurred. * * Time is measured since an arbitrary and OS-dependent start time. * The returned real time is only useful for computing an elapsed time * between two calls to this function. */ double getRealTime( ) { #if defined(_WIN32) FILETIME tm; ULONGLONG t; #if defined(NTDDI_WIN8) && NTDDI_VERSION >= NTDDI_WIN8 /* Windows 8, Windows Server 2012 and later. ---------------- */ GetSystemTimePreciseAsFileTime( &tm ); #else /* Windows 2000 and later. ---------------------------------- */ GetSystemTimeAsFileTime( &tm ); #endif t = ((ULONGLONG)tm.dwHighDateTime << 32) | (ULONGLONG)tm.dwLowDateTime; return (double)t / 10000000.0; #elif (defined(__hpux) || defined(hpux)) || ((defined(__sun__) || defined(__sun) || defined(sun)) && (defined(__SVR4) || defined(__svr4__))) /* HP-UX, Solaris. ------------------------------------------ */ return (double)gethrtime( ) / 1000000000.0; #elif defined(__MACH__) && defined(__APPLE__) /* OSX. ----------------------------------------------------- */ static double timeConvert = 0.0; if ( timeConvert == 0.0 ) { mach_timebase_info_data_t timeBase; (void)mach_timebase_info( &timeBase ); timeConvert = (double)timeBase.numer / (double)timeBase.denom / 1000000000.0; } return (double)mach_absolute_time( ) * timeConvert; #elif defined(_POSIX_VERSION) /* POSIX. --------------------------------------------------- */ #if defined(_POSIX_TIMERS) && (_POSIX_TIMERS > 0) { struct timespec ts; #if defined(CLOCK_MONOTONIC_PRECISE) /* BSD. --------------------------------------------- */ const clockid_t id = CLOCK_MONOTONIC_PRECISE; #elif defined(CLOCK_MONOTONIC_RAW) /* Linux. ------------------------------------------- */ const clockid_t id = CLOCK_MONOTONIC_RAW; #elif defined(CLOCK_HIGHRES) /* Solaris. ----------------------------------------- */ const clockid_t id = CLOCK_HIGHRES; #elif defined(CLOCK_MONOTONIC) /* AIX, BSD, Linux, POSIX, Solaris. ----------------- */ const clockid_t id = CLOCK_MONOTONIC; #elif defined(CLOCK_REALTIME) /* AIX, BSD, HP-UX, Linux, POSIX. ------------------- */ const clockid_t id = CLOCK_REALTIME; #else const clockid_t id = (clockid_t)-1; /* Unknown. */ #endif /* CLOCK_* */ if ( id != (clockid_t)-1 && clock_gettime( id, &ts ) != -1 ) return (double)ts.tv_sec + (double)ts.tv_nsec / 1000000000.0; /* Fall thru. */ } #endif /* _POSIX_TIMERS */ /* AIX, BSD, Cygwin, HP-UX, Linux, OSX, POSIX, Solaris. ----- */ struct timeval tm; gettimeofday( &tm, NULL ); return (double)tm.tv_sec + (double)tm.tv_usec / 1000000.0; #else return -1.0; /* Failed. */ #endif }