Skip to content

Instantly share code, notes, and snippets.

@ShaderManager
Last active July 22, 2020 12:14
Show Gist options
  • Save ShaderManager/f68e7bd0c55017c6d2ab to your computer and use it in GitHub Desktop.
Save ShaderManager/f68e7bd0c55017c6d2ab to your computer and use it in GitHub Desktop.
static inline __m128 vselect(const __m128 a, const __m128 b, const __m128 mask)
{
return _mm_or_ps(_mm_and_ps(mask, a), _mm_andnot_ps(mask, b));
}
static inline __m128i vselect(const __m128i a, const __m128i b, const __m128i mask)
{
return _mm_or_si128(_mm_and_si128(mask, a), _mm_andnot_si128(mask, b));
}
/*
Sort 4 floats in SSE vector using sorting network and return indices of moved values
*/
inline __m128i v4_sort(__m128& v)
{
// @todo Replace by constants?
auto i = _mm_castsi128_ps(_mm_set_epi32(3, 2, 1, 0));
const auto mask = _mm_castsi128_ps(_mm_set1_epi32(0xFFFFFFFC));
// Place indices in lower 2 bits of mantissa
v = _mm_or_ps(_mm_and_ps(v, mask), i);
// Simple sorting network for n=4
// First pass
auto temp = _mm_shuffle_ps(v, v, _MM_SHUFFLE(1, 0, 3, 2));
auto cmp = _mm_cmplt_ps(v, temp);
cmp = _mm_shuffle_ps(cmp, cmp, _MM_SHUFFLE(1, 0, 1, 0));
auto temp2 = vselect(v, temp, cmp);
// Second pass
temp = _mm_shuffle_ps(temp2, temp2, _MM_SHUFFLE(2, 3, 0, 1));
cmp = _mm_cmplt_ps(temp2, temp);
cmp = _mm_shuffle_ps(cmp, cmp, _MM_SHUFFLE(2, 0, 2, 0));
temp2 = vselect(temp2, temp, cmp);
// Third pass
temp = _mm_shuffle_ps(temp2, temp2, _MM_SHUFFLE(3, 1, 2, 0));
cmp = _mm_cmplt_ps(temp2, temp);
cmp = _mm_shuffle_ps(cmp, cmp, _MM_SHUFFLE(3, 1, 1, 0));
v = vselect(temp2, temp, cmp);
// Remove indices from input and return them
return _mm_castps_si128(_mm_andnot_ps(mask, v));
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment