@@ -0,0 +1,156 @@
#include < stdbool.h>
#include < spu_intrinsics.h>
// shuffle helpers
#define L0 0x00010203
#define L1 0x04050607
#define L2 0x08090a0b
#define L3 0x0c0d0e0f
#define R0 0x10111213
#define R1 0x14151617
#define R2 0x18191a1b
#define R3 0x1c1d1e1f
#define ZERO 0x80808080
#define SHUFFLE (l, r, x, y, z, w ) si_shufb(l, r, ((qword)(vec_uint4){x, y, z, w}))
// splat helper
#define SPLAT (v, idx ) si_shufb(v, v, (qword)(vec_uint4)(L ## idx))
struct matrix_t
{
vec_float4 row0;
vec_float4 row1;
vec_float4 row2;
vec_float4 row3;
};
struct aabb_t
{
vec_float4 min;
vec_float4 max;
};
static inline void transform_points_4 (qword* dest, qword x, qword y, qword z, const struct matrix_t * mat)
{
#define COMP (c ) \
qword res_ ## c = SPLAT ((qword)mat->row3 , c); \
res_ ## c = si_fma (z, SPLAT ((qword)mat->row2 , c), res_ ## c); \
res_ ## c = si_fma (y, SPLAT ((qword)mat->row1 , c), res_ ## c); \
res_ ## c = si_fma (x, SPLAT ((qword)mat->row0 , c), res_ ## c); \
dest[c] = res_ ## c;
COMP (0 );
COMP (1 );
COMP (2 );
COMP (3 );
#undef COMP
}
static inline void transform_matrix (struct matrix_t * dest, const struct matrix_t * lhs, const struct matrix_t * rhs)
{
#define COMP_0 (c ) \
qword res_ ## c = si_fm ((qword)lhs->row2 , SPLAT ((qword)rhs->row ## c, 2 )); \
res_ ## c = si_fma ((qword)lhs->row1 , SPLAT ((qword)rhs->row ## c, 1 ), res_ ## c); \
res_ ## c = si_fma ((qword)lhs->row0 , SPLAT ((qword)rhs->row ## c, 0 ), res_ ## c); \
dest->row ## c = (vec_float4)res_ ## c;
#define COMP_1 (c ) \
qword res_ ## c = si_fma ((qword)lhs->row2 , SPLAT ((qword)rhs->row ## c, 2 ), (qword)lhs->row3 ); \
res_ ## c = si_fma ((qword)lhs->row1 , SPLAT ((qword)rhs->row ## c, 1 ), res_ ## c); \
res_ ## c = si_fma ((qword)lhs->row0 , SPLAT ((qword)rhs->row ## c, 0 ), res_ ## c); \
dest->row ## c = (vec_float4)res_ ## c;
COMP_0 (0 );
COMP_0 (1 );
COMP_0 (2 );
COMP_1 (3 );
#undef COMP_0
#undef COMP_1
}
__attribute__ ((noinline)) unsigned int is_visible(const struct matrix_t * transform, const struct aabb_t * aabb, const struct matrix_t * frustum)
{
qword min = (qword)aabb->min ;
qword max = (qword)aabb->max ;
// get aabb points (SoA)
qword minmax_x = SHUFFLE (min, max, L0, R0, L0, R0); // x X x X
qword minmax_y = SHUFFLE (min, max, L1, L1, R1, R1); // y y Y Y
qword minmax_z_0 = SPLAT (min, 2 ); // z z z z
qword minmax_z_1 = SPLAT (max, 2 ); // Z Z Z Z
// get clipping matrix
struct matrix_t clip;
transform_matrix (&clip, frustum, transform);
// transform points to clip space
qword points_cs_0[4 ];
qword points_cs_1[4 ];
transform_points_4 (points_cs_0, minmax_x, minmax_y, minmax_z_0, &clip);
transform_points_4 (points_cs_1, minmax_x, minmax_y, minmax_z_1, &clip);
// calculate -w
qword points_cs_0_negw = si_xor (points_cs_0[3 ], (qword)(vec_uint4)(0x80000000 ));
qword points_cs_1_negw = si_xor (points_cs_1[3 ], (qword)(vec_uint4)(0x80000000 ));
// for each plane...
#define NOUT (a, b, c, d ) si_orx(si_or(si_fcgt(a, b), si_fcgt(c, d)))
qword nout0 = NOUT (points_cs_0[0 ], points_cs_0_negw, points_cs_1[0 ], points_cs_1_negw);
qword nout1 = NOUT (points_cs_0[3 ], points_cs_0[0 ], points_cs_1[3 ], points_cs_1[0 ]);
qword nout2 = NOUT (points_cs_0[1 ], points_cs_0_negw, points_cs_1[1 ], points_cs_1_negw);
qword nout3 = NOUT (points_cs_0[3 ], points_cs_0[1 ], points_cs_1[3 ], points_cs_1[1 ]);
qword nout4 = NOUT (points_cs_0[2 ], (qword)(0 ), points_cs_1[2 ], (qword)(0 ));
qword nout5 = NOUT (points_cs_0[3 ], points_cs_0[2 ], points_cs_1[3 ], points_cs_1[2 ]);
#undef NOUT
// merge "not outside" flags
qword nout01 = si_and (nout0, nout1);
qword nout012 = si_and (nout01, nout2);
qword nout34 = si_and (nout3, nout4);
qword nout345 = si_and (nout34, nout5);
qword nout = si_and (nout012, nout345);
return si_to_uint (nout);
}
// simple ortho frustum
struct matrix_t frustum =
{
{ 0 .1f , 0 , 0 , 0 },
{ 0 , 0 .1f , 0 , 0 },
{ 0 , 0 , 0 .1f , 0 },
{ 0 , 0 , 0 , 1 }
};
// small box
struct aabb_t aabb =
{
{ -1 , -2 , -3 },
{ 1 , 2 , 3 }
};
// and some weird matrix
struct matrix_t transform =
{
{ 0 .123f , 0 .456f , 0 .789f },
{ 0 .456f , 0 .123f , 0 .789f },
{ 0 .789f , 0 .123f , 0 .456f },
{ 1 .f , -1 .f , 1 .f }
};
int main ()
{
is_visible (&transform, &aabb, &frustum);
si_stop (0 );
}