This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| void div2_SSE(double x, double xx, double y, double yy, double* z, double* zz){ | |
| double c, cc, u, uu; | |
| __m128d x_xx = {x, xx}; | |
| __m128d y_yy = {y, yy}; | |
| __m128d c_cc = _mm_div_pd(x_xx, y_yy); // {x/y, xx/yy} | |
| _mm_storeh_pd(&c, c_cc); | |
| mul12_SSE(c, y, &u, &uu); | |
| __m128d u_uu = {u, uu}; | |
| __m128d tmp; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| void mul2_SSE(double x, double xx, double y, double yy, double* z, double* zz){ | |
| double c, cc; | |
| mul12_SSE( x, y, &c, &cc); | |
| __m128d x_y = {x, y}; | |
| __m128d yy_xx = {yy, xx}; | |
| __m128d xyy_yxx = _mm_mul_pd(x_y, yy_xx); | |
| __m128d cc_cc = {cc, cc}; | |
| cc_cc = _mm_add_pd(_mm_add_pd(xyy_yxx, _mm_shuffle_pd(xyy_yxx, xyy_yxx, 1)), cc_cc); | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| void mul12_SSE(double x, double y, double* z, double* zz){ | |
| const double factor = 134217729; | |
| __m128d factors = {factor, factor}; | |
| __m128d xy = {x, y}; | |
| __m128d hh, tt, pp, qq; | |
| pp = _mm_mul_pd(xy, factors); | |
| hh = _mm_sub_pd(xy, pp); | |
| hh = _mm_add_pd(hh, pp); | |
| tt = _mm_sub_pd(xy, hh); |