Skip to content

Instantly share code, notes, and snippets.

@MORTAL2000
Forked from zeux/vfc5.cpp
Created January 29, 2020 11:29
Show Gist options
  • Save MORTAL2000/b67cdfe690eff00bbfa4c9b4afddca98 to your computer and use it in GitHub Desktop.
Save MORTAL2000/b67cdfe690eff00bbfa4c9b4afddca98 to your computer and use it in GitHub Desktop.

Revisions

  1. @zeux zeux created this gist Feb 12, 2016.
    156 changes: 156 additions & 0 deletions vfc5.cpp
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,156 @@
    #include <stdbool.h>
    #include <spu_intrinsics.h>

    // shuffle helpers
    #define L0 0x00010203
    #define L1 0x04050607
    #define L2 0x08090a0b
    #define L3 0x0c0d0e0f

    #define R0 0x10111213
    #define R1 0x14151617
    #define R2 0x18191a1b
    #define R3 0x1c1d1e1f

    #define ZERO 0x80808080

    #define SHUFFLE(l, r, x, y, z, w) si_shufb(l, r, ((qword)(vec_uint4){x, y, z, w}))

    // splat helper
    #define SPLAT(v, idx) si_shufb(v, v, (qword)(vec_uint4)(L ## idx))

    struct matrix_t
    {
    vec_float4 row0;
    vec_float4 row1;
    vec_float4 row2;
    vec_float4 row3;
    };

    struct aabb_t
    {
    vec_float4 min;
    vec_float4 max;
    };

    static inline void transform_points_4(qword* dest, qword x, qword y, qword z, const struct matrix_t* mat)
    {
    #define COMP(c) \
    qword res_ ## c = SPLAT((qword)mat->row3, c); \
    res_ ## c = si_fma(z, SPLAT((qword)mat->row2, c), res_ ## c); \
    res_ ## c = si_fma(y, SPLAT((qword)mat->row1, c), res_ ## c); \
    res_ ## c = si_fma(x, SPLAT((qword)mat->row0, c), res_ ## c); \
    dest[c] = res_ ## c;

    COMP(0);
    COMP(1);
    COMP(2);
    COMP(3);

    #undef COMP
    }

    static inline void transform_matrix(struct matrix_t* dest, const struct matrix_t* lhs, const struct matrix_t* rhs)
    {
    #define COMP_0(c) \
    qword res_ ## c = si_fm((qword)lhs->row2, SPLAT((qword)rhs->row ## c, 2)); \
    res_ ## c = si_fma((qword)lhs->row1, SPLAT((qword)rhs->row ## c, 1), res_ ## c); \
    res_ ## c = si_fma((qword)lhs->row0, SPLAT((qword)rhs->row ## c, 0), res_ ## c); \
    dest->row ## c = (vec_float4)res_ ## c;

    #define COMP_1(c) \
    qword res_ ## c = si_fma((qword)lhs->row2, SPLAT((qword)rhs->row ## c, 2), (qword)lhs->row3); \
    res_ ## c = si_fma((qword)lhs->row1, SPLAT((qword)rhs->row ## c, 1), res_ ## c); \
    res_ ## c = si_fma((qword)lhs->row0, SPLAT((qword)rhs->row ## c, 0), res_ ## c); \
    dest->row ## c = (vec_float4)res_ ## c;

    COMP_0(0);
    COMP_0(1);
    COMP_0(2);
    COMP_1(3);

    #undef COMP_0
    #undef COMP_1
    }

    __attribute__((noinline)) unsigned int is_visible(const struct matrix_t* transform, const struct aabb_t* aabb, const struct matrix_t* frustum)
    {
    qword min = (qword)aabb->min;
    qword max = (qword)aabb->max;

    // get aabb points (SoA)
    qword minmax_x = SHUFFLE(min, max, L0, R0, L0, R0); // x X x X
    qword minmax_y = SHUFFLE(min, max, L1, L1, R1, R1); // y y Y Y
    qword minmax_z_0 = SPLAT(min, 2); // z z z z
    qword minmax_z_1 = SPLAT(max, 2); // Z Z Z Z

    // get clipping matrix
    struct matrix_t clip;

    transform_matrix(&clip, frustum, transform);

    // transform points to clip space
    qword points_cs_0[4];
    qword points_cs_1[4];

    transform_points_4(points_cs_0, minmax_x, minmax_y, minmax_z_0, &clip);
    transform_points_4(points_cs_1, minmax_x, minmax_y, minmax_z_1, &clip);

    // calculate -w
    qword points_cs_0_negw = si_xor(points_cs_0[3], (qword)(vec_uint4)(0x80000000));
    qword points_cs_1_negw = si_xor(points_cs_1[3], (qword)(vec_uint4)(0x80000000));

    // for each plane...
    #define NOUT(a, b, c, d) si_orx(si_or(si_fcgt(a, b), si_fcgt(c, d)))

    qword nout0 = NOUT(points_cs_0[0], points_cs_0_negw, points_cs_1[0], points_cs_1_negw);
    qword nout1 = NOUT(points_cs_0[3], points_cs_0[0], points_cs_1[3], points_cs_1[0]);
    qword nout2 = NOUT(points_cs_0[1], points_cs_0_negw, points_cs_1[1], points_cs_1_negw);
    qword nout3 = NOUT(points_cs_0[3], points_cs_0[1], points_cs_1[3], points_cs_1[1]);
    qword nout4 = NOUT(points_cs_0[2], (qword)(0), points_cs_1[2], (qword)(0));
    qword nout5 = NOUT(points_cs_0[3], points_cs_0[2], points_cs_1[3], points_cs_1[2]);

    #undef NOUT

    // merge "not outside" flags
    qword nout01 = si_and(nout0, nout1);
    qword nout012 = si_and(nout01, nout2);

    qword nout34 = si_and(nout3, nout4);
    qword nout345 = si_and(nout34, nout5);

    qword nout = si_and(nout012, nout345);

    return si_to_uint(nout);
    }

    // simple ortho frustum
    struct matrix_t frustum =
    {
    { 0.1f, 0, 0, 0 },
    { 0, 0.1f, 0, 0 },
    { 0, 0, 0.1f, 0 },
    { 0, 0, 0, 1 }
    };

    // small box
    struct aabb_t aabb =
    {
    { -1, -2, -3 },
    { 1, 2, 3 }
    };

    // and some weird matrix
    struct matrix_t transform =
    {
    { 0.123f, 0.456f, 0.789f },
    { 0.456f, 0.123f, 0.789f },
    { 0.789f, 0.123f, 0.456f },
    { 1.f, -1.f, 1.f }
    };

    int main()
    {
    is_visible(&transform, &aabb, &frustum);
    si_stop(0);
    }