Skip to content

Instantly share code, notes, and snippets.

void div2_SSE(double x, double xx, double y, double yy, double* z, double* zz){
double c, cc, u, uu;
__m128d x_xx = {x, xx};
__m128d y_yy = {y, yy};
__m128d c_cc = _mm_div_pd(x_xx, y_yy); // {x/y, xx/yy}
_mm_storeh_pd(&c, c_cc);
mul12_SSE(c, y, &u, &uu);
__m128d u_uu = {u, uu};
__m128d tmp;
void mul2_SSE(double x, double xx, double y, double yy, double* z, double* zz){
double c, cc;
mul12_SSE( x, y, &c, &cc);
__m128d x_y = {x, y};
__m128d yy_xx = {yy, xx};
__m128d xyy_yxx = _mm_mul_pd(x_y, yy_xx);
__m128d cc_cc = {cc, cc};
cc_cc = _mm_add_pd(_mm_add_pd(xyy_yxx, _mm_shuffle_pd(xyy_yxx, xyy_yxx, 1)), cc_cc);
void mul12_SSE(double x, double y, double* z, double* zz){
const double factor = 134217729;
__m128d factors = {factor, factor};
__m128d xy = {x, y};
__m128d hh, tt, pp, qq;
pp = _mm_mul_pd(xy, factors);
hh = _mm_sub_pd(xy, pp);
hh = _mm_add_pd(hh, pp);
tt = _mm_sub_pd(xy, hh);