Skip to content

Instantly share code, notes, and snippets.

@ericlagergren
Created February 5, 2022 19:23
Show Gist options
  • Select an option

  • Save ericlagergren/28f9178bff76fcc2a0c043f16656548d to your computer and use it in GitHub Desktop.

Select an option

Save ericlagergren/28f9178bff76fcc2a0c043f16656548d to your computer and use it in GitHub Desktop.

Revisions

  1. ericlagergren created this gist Feb 5, 2022.
    47 changes: 47 additions & 0 deletions poly.c
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,47 @@
    #include <immintrin.h>
    #include <inttypes.h>
    #include <stdint.h>
    #include <stdio.h>

    // hi lo
    // Vd, Vn, Vm, #imm
    #define vext8(x, y) ({ \
    uint64_t xhi = _mm_extract_epi64((x), 1); \
    uint64_t ylo = _mm_extract_epi64((y), 0); \
    _mm_set_epi64((__m64)ylo, (__m64)xhi); \
    })

    __m128i polymul(uint8_t acc[16], uint8_t key[16]) {
    __m128i x = _mm_loadu_si128((const __m128i*)acc);
    __m128i y = _mm_loadu_si128((const __m128i*)key);

    // Karatsuba 1
    __m128i tmp0 = vext8(x, y);
    tmp0 = _mm_xor_si128(tmp0, x);
    __m128i tmp1 = vext8(y, y);
    tmp1 = _mm_xor_si128(tmp1, y);
    __m128i m = _mm_clmulepi64_si128(tmp0, tmp1, 0x00);
    __m128i h = _mm_clmulepi64_si128(x, y, 0x11);
    __m128i l = _mm_clmulepi64_si128(x, y, 0x00);

    // Karatsuba 2
    __m128i tmp2 = vext8(l, h);
    m = _mm_xor_si128(m, tmp2);
    tmp2 = _mm_xor_si128(h, l);
    tmp2 = _mm_xor_si128(m, tmp2);
    h = vext8(h, h);
    l = vext8(l, l);
    __m128i x23 = vext8(tmp2, h);
    __m128i x01 = vext8(l, tmp2);

    // Reduce
    __m128i poly = _mm_set_epi64((__m64)0xc200000000000000ULL,
    (__m64)0xc200000000000000ULL);
    __m128i a = _mm_clmulepi64_si128(poly, x01, 0x00);
    __m128i b = vext8(a, a);
    b = _mm_xor_si128(b, x01);
    __m128i c = _mm_clmulepi64_si128(poly, b, 0x11);
    __m128i d = _mm_xor_si128(c, b);
    d = _mm_xor_si128(d, x23);
    return d;
    }