Created
February 5, 2022 19:23
-
-
Save ericlagergren/28f9178bff76fcc2a0c043f16656548d to your computer and use it in GitHub Desktop.
Revisions
-
ericlagergren created this gist
Feb 5, 2022 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,47 @@ #include <immintrin.h> #include <inttypes.h> #include <stdint.h> #include <stdio.h> // hi lo // Vd, Vn, Vm, #imm #define vext8(x, y) ({ \ uint64_t xhi = _mm_extract_epi64((x), 1); \ uint64_t ylo = _mm_extract_epi64((y), 0); \ _mm_set_epi64((__m64)ylo, (__m64)xhi); \ }) __m128i polymul(uint8_t acc[16], uint8_t key[16]) { __m128i x = _mm_loadu_si128((const __m128i*)acc); __m128i y = _mm_loadu_si128((const __m128i*)key); // Karatsuba 1 __m128i tmp0 = vext8(x, y); tmp0 = _mm_xor_si128(tmp0, x); __m128i tmp1 = vext8(y, y); tmp1 = _mm_xor_si128(tmp1, y); __m128i m = _mm_clmulepi64_si128(tmp0, tmp1, 0x00); __m128i h = _mm_clmulepi64_si128(x, y, 0x11); __m128i l = _mm_clmulepi64_si128(x, y, 0x00); // Karatsuba 2 __m128i tmp2 = vext8(l, h); m = _mm_xor_si128(m, tmp2); tmp2 = _mm_xor_si128(h, l); tmp2 = _mm_xor_si128(m, tmp2); h = vext8(h, h); l = vext8(l, l); __m128i x23 = vext8(tmp2, h); __m128i x01 = vext8(l, tmp2); // Reduce __m128i poly = _mm_set_epi64((__m64)0xc200000000000000ULL, (__m64)0xc200000000000000ULL); __m128i a = _mm_clmulepi64_si128(poly, x01, 0x00); __m128i b = vext8(a, a); b = _mm_xor_si128(b, x01); __m128i c = _mm_clmulepi64_si128(poly, b, 0x11); __m128i d = _mm_xor_si128(c, b); d = _mm_xor_si128(d, x23); return d; }