Skip to content

Instantly share code, notes, and snippets.

@tannergooding
Last active September 2, 2022 09:14
Show Gist options
  • Select an option

  • Save tannergooding/103be726d48dc3d0b09e890bad0b892f to your computer and use it in GitHub Desktop.

Select an option

Save tannergooding/103be726d48dc3d0b09e890bad0b892f to your computer and use it in GitHub Desktop.

Revisions

  1. tannergooding renamed this gist Jul 28, 2020. 1 changed file with 0 additions and 0 deletions.
    File renamed without changes.
  2. tannergooding revised this gist Jul 28, 2020. 2 changed files with 104 additions and 82 deletions.
    180 changes: 98 additions & 82 deletions MathF.Sin.cs
    Original file line number Diff line number Diff line change
    @@ -59,128 +59,144 @@

    public static float Sin(float x)
    {
    return SoftwareFallback(x);
    double result = x;

    static float SoftwareFallback(float x)
    if (float.IsFinite(x))
    {
    double result = x;
    double ax = Math.Abs(x);

    if (float.IsFinite(x))
    if (ax <= PiOverFour)
    {
    double ax = Math.Abs(x);

    if (ax <= PiOverFour)
    if (ax >= TwoPowNegSeven)
    {
    if (ax >= TwoPowNegSeven)
    {
    result = SinTaylorSeriesFourIterations(x);
    }
    else if (ax >= TwoPowNegThirteen)
    {
    result = SinTaylorSeriesOneIteration(x);
    }
    else
    {
    result = x;
    }
    result = SinTaylorSeriesFourIterations(x);
    }
    else if (ax >= TwoPowNegThirteen)
    {
    result = SinTaylorSeriesOneIteration(x);
    }
    else
    {
    if (float.IsNegative(x))
    {
    x = -x;
    }
    result = x;
    }
    }
    else
    {
    int wasNegative = 0;

    if (x < 16000000.0)
    {
    // Reduce x to be in the range of -(PI / 4) to (PI / 4), inclusive
    if (float.IsNegative(x))
    {
    x = -x;
    wasNegative = 1;
    }

    // This is done by subtracting multiples of (PI / 2). Double-precision
    // isn't quite accurate enough and introduces some error, but we account
    // for that using a tail value that helps account for this.
    int region;

    long axExp = BitConverter.DoubleToInt64Bits(ax) >> 52;
    if (x < 16000000.0)
    {
    // Reduce x to be in the range of -(PI / 4) to (PI / 4), inclusive

    int region = (int)(x * TwoOverPi + 0.5);
    double piOverTwoCount = region;
    // This is done by subtracting multiples of (PI / 2). Double-precision
    // isn't quite accurate enough and introduces some error, but we account
    // for that using a tail value that helps account for this.

    double rHead = x - (piOverTwoCount * PiOverTwoPartOne);
    double rTail = (piOverTwoCount * PiOverTwoPartOneTail);
    long axExp = BitConverter.DoubleToInt64Bits(ax) >> 52;

    double r = rHead - rTail;
    long rExp = (BitConverter.DoubleToInt64Bits(r) << 1) >> 53;
    region = (int)(x * TwoOverPi + 0.5);
    double piOverTwoCount = region;

    if ((axExp - rExp) > 15)
    {
    // The remainder is pretty small compared with x, which implies that x is
    // near a multiple of (PI / 2). That is, x matches the multiple to at least
    // 15 bits and so we perform an additional fixup to account for any error
    double rHead = x - (piOverTwoCount * PiOverTwoPartOne);
    double rTail = (piOverTwoCount * PiOverTwoPartOneTail);

    double r = rHead - rTail;
    long rExp = (BitConverter.DoubleToInt64Bits(r) << 1) >> 53;

    r = rHead;
    if ((axExp - rExp) > 15)
    {
    // The remainder is pretty small compared with x, which implies that x is
    // near a multiple of (PI / 2). That is, x matches the multiple to at least
    // 15 bits and so we perform an additional fixup to account for any error

    rTail = (piOverTwoCount * PiOverTwoPartTwo);
    rHead = r - rTail;
    rTail = (piOverTwoCount * PiOverTwoPartTwoTail) - ((r - rHead) - rTail);
    r = rHead;

    r = rHead - rTail;
    }
    rTail = (piOverTwoCount * PiOverTwoPartTwo);
    rHead = r - rTail;
    rTail = (piOverTwoCount * PiOverTwoPartTwoTail) - ((r - rHead) - rTail);

    r = rHead - rTail;
    }

    if (rExp >= 0x3F2) // r >= 2^-13
    if (rExp >= 0x3F2) // r >= 2^-13
    {
    if ((region & 1) == 0) // region 0 or 2
    {
    if ((region & 1) == 0) // region 0 or 2
    {
    result = SinTaylorSeriesFourIterations(r);
    }
    else // region 1 or 3
    {
    result = CosTaylorSeriesFourIterations(r);
    }
    result = SinTaylorSeriesFourIterations(r);
    }
    else if (rExp > 0x3DE) // r > 1.1641532182693481E-10
    else // region 1 or 3
    {
    result = CosTaylorSeriesFourIterations(r);
    }
    }
    else if (rExp > 0x3DE) // r > 1.1641532182693481E-10
    {
    if ((region & 1) == 0) // region 0 or 2
    {
    if ((region & 1) == 0) // region 0 or 2
    {
    result = SinTaylorSeriesOneIteration(r);
    }
    else // region 1 or 3
    {
    result = CosTaylorSeriesOneIteration(r);

    }
    result = SinTaylorSeriesOneIteration(r);
    }
    else
    else // region 1 or 3
    {
    if ((region & 1) == 0) // region 0 or 2
    {
    result = r;
    }
    else // region 1 or 3
    {
    result = 1;
    }
    result = CosTaylorSeriesOneIteration(r);

    }
    }
    else
    {
    double r = ReduceForLargeInput(x, out int region);

    if ((region & 1) == 0) // region 0 or 2
    {
    result = SinTaylorSeriesFourIterations(r);
    result = r;
    }
    else // region 1 or 3
    {
    result = CosTaylorSeriesFourIterations(r);
    result = 1;
    }
    }
    }
    else
    {
    double r = ReduceForLargeInput(x, out region);

    if ((region & 1) == 0) // region 0 or 2
    {
    result = SinTaylorSeriesFourIterations(r);
    }
    else // region 1 or 3
    {
    result = CosTaylorSeriesFourIterations(r);
    }
    }

    // Lsinf_sse2_adjust_region
    region >>= 1;

    int tmp1 = region & wasNegative;

    region = ~region;
    wasNegative = ~wasNegative;

    int tmp2 = region & wasNegative;

    if (((tmp1 | tmp2) & 1) == 0)
    {
    // If the original region was 0/1 and arg is negative, then we negate the result.
    // -or-
    // If the original region was 2/3 and arg is positive, then we negate the result.

    result = -result;
    }
    }

    return (float)result;
    }

    return (float)result;

    [MethodImpl(MethodImplOptions.AggressiveInlining)]
    static double CosTaylorSeriesOneIteration(double x1)
    {
    6 changes: 6 additions & 0 deletions gistfile1.txt
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,6 @@
    | Test Env | Method | Mean | Error | StdDev | Median | Min | Max | Gen 0 | Gen 1 | Gen 2 | Allocated |
    |-------------------- |------- |---------:|---------:|---------:|---------:|---------:|---------:|------:|------:|------:|----------:|
    | x64 Windows Native | Sin | 22.82 us | 0.015 us | 0.014 us | 22.82 us | 22.80 us | 22.85 us | - | - | - | - |
    | x86 Windows Native | Sin | 60.48 us | 0.087 us | 0.068 us | 60.48 us | 60.38 us | 60.60 us | - | - | - | - |
    | x64 Windows Managed | Sin | 21.26 us | 0.305 us | 0.285 us | 21.27 us | 20.85 us | 21.65 us | - | - | - | - |
    | x86 Windows Managed | Sin | 38.10 us | 0.390 us | 0.346 us | 38.06 us | 37.66 us | 38.71 us | - | - | - | - |
  3. tannergooding created this gist Jul 28, 2020.
    358 changes: 358 additions & 0 deletions MathF.Sin.cs
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,358 @@
    // This is a port of the amd/win-libm implementation provided in assembly here: https://github.com/amd/win-libm/blob/master/sinf.asm
    // The original source is Copyright (c) 2002-2019 Advanced Micro Devices, Inc. and provided under the MIT License.
    //
    // Permission is hereby granted, free of charge, to any person obtaining a copy
    // of this Software and associated documentaon files (the "Software"), to deal
    // in the Software without restriction, including without limitation the rights
    // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    // copies of the Software, and to permit persons to whom the Software is
    // furnished to do so, subject to the following conditions:
    //
    // The above copyright notice and this permission notice shall be included in
    // all copies or substantial portions of the Software.
    //
    // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    // THE SOFTWARE.

    const double PiOverTwo = 1.5707963267948966;
    const double PiOverTwoPartOne = 1.5707963267341256;
    const double PiOverTwoPartOneTail = 6.077100506506192E-11;
    const double PiOverTwoPartTwo = 6.077100506303966E-11;
    const double PiOverTwoPartTwoTail = 2.0222662487959506E-21;
    const double PiOverFour = 0.7853981633974483;
    const double TwoOverPi = 0.6366197723675814;
    const double TwoPowNegSeven = 0.0078125;
    const double TwoPowNegThirteen = 0.0001220703125;

    const double C0 = -1.0 / 2.0; // 1 / 2!
    const double C1 = +1.0 / 24.0; // 1 / 4!
    const double C2 = -1.0 / 720.0; // 1 / 6!
    const double C3 = +1.0 / 40320.0; // 1 / 8!
    const double C4 = -1.0 / 3628800.0; // 1 / 10!

    const double S1 = -1.0 / 6.0; // 1 / 3!
    const double S2 = +1.0 / 120.0; // 1 / 5!
    const double S3 = -1.0 / 5040.0; // 1 / 7!
    const double S4 = +1.0 / 362880.0; // 1 / 9!

    private static readonly long[] PiBits = new long[]
    {
    0,
    5215,
    13000023176,
    11362338026,
    67174558139,
    34819822259,
    10612056195,
    67816420731,
    57840157550,
    19558516809,
    50025467026,
    25186875954,
    18152700886
    };

    public static float Sin(float x)
    {
    return SoftwareFallback(x);

    static float SoftwareFallback(float x)
    {
    double result = x;

    if (float.IsFinite(x))
    {
    double ax = Math.Abs(x);

    if (ax <= PiOverFour)
    {
    if (ax >= TwoPowNegSeven)
    {
    result = SinTaylorSeriesFourIterations(x);
    }
    else if (ax >= TwoPowNegThirteen)
    {
    result = SinTaylorSeriesOneIteration(x);
    }
    else
    {
    result = x;
    }
    }
    else
    {
    if (float.IsNegative(x))
    {
    x = -x;
    }

    if (x < 16000000.0)
    {
    // Reduce x to be in the range of -(PI / 4) to (PI / 4), inclusive

    // This is done by subtracting multiples of (PI / 2). Double-precision
    // isn't quite accurate enough and introduces some error, but we account
    // for that using a tail value that helps account for this.

    long axExp = BitConverter.DoubleToInt64Bits(ax) >> 52;

    int region = (int)(x * TwoOverPi + 0.5);
    double piOverTwoCount = region;

    double rHead = x - (piOverTwoCount * PiOverTwoPartOne);
    double rTail = (piOverTwoCount * PiOverTwoPartOneTail);

    double r = rHead - rTail;
    long rExp = (BitConverter.DoubleToInt64Bits(r) << 1) >> 53;

    if ((axExp - rExp) > 15)
    {
    // The remainder is pretty small compared with x, which implies that x is
    // near a multiple of (PI / 2). That is, x matches the multiple to at least
    // 15 bits and so we perform an additional fixup to account for any error

    r = rHead;

    rTail = (piOverTwoCount * PiOverTwoPartTwo);
    rHead = r - rTail;
    rTail = (piOverTwoCount * PiOverTwoPartTwoTail) - ((r - rHead) - rTail);

    r = rHead - rTail;
    }

    if (rExp >= 0x3F2) // r >= 2^-13
    {
    if ((region & 1) == 0) // region 0 or 2
    {
    result = SinTaylorSeriesFourIterations(r);
    }
    else // region 1 or 3
    {
    result = CosTaylorSeriesFourIterations(r);
    }
    }
    else if (rExp > 0x3DE) // r > 1.1641532182693481E-10
    {
    if ((region & 1) == 0) // region 0 or 2
    {
    result = SinTaylorSeriesOneIteration(r);
    }
    else // region 1 or 3
    {
    result = CosTaylorSeriesOneIteration(r);

    }
    }
    else
    {
    if ((region & 1) == 0) // region 0 or 2
    {
    result = r;
    }
    else // region 1 or 3
    {
    result = 1;
    }
    }
    }
    else
    {
    double r = ReduceForLargeInput(x, out int region);

    if ((region & 1) == 0) // region 0 or 2
    {
    result = SinTaylorSeriesFourIterations(r);
    }
    else // region 1 or 3
    {
    result = CosTaylorSeriesFourIterations(r);
    }
    }

    // Lsinf_sse2_adjust_region
    }
    }

    return (float)result;
    }

    [MethodImpl(MethodImplOptions.AggressiveInlining)]
    static double CosTaylorSeriesOneIteration(double x1)
    {
    // 1 - (x^2 / 2!)
    double x2 = x1 * x1;
    return 1.0 + (x2 * C1);
    }

    [MethodImpl(MethodImplOptions.AggressiveInlining)]
    static double CosTaylorSeriesFourIterations(double x1)
    {
    // 1 - (x^2 / 2!) + (x^4 / 4!) - (x^6 / 6!) + (x^8 / 8!) - (x^10 / 10!)

    double x2 = x1 * x1;
    double x4 = x2 * x2;

    return 1.0 + (x2 * C0) + (x4 * ((C1 + (x2 * C2)) + (x4 * (C3 + (x2 * C4)))));
    }

    static unsafe double ReduceForLargeInput(double x, out int region)
    {
    Debug.Assert(!double.IsNegative(x));

    // This method simulates multi-precision floating-point
    // arithmetic and is accurate for all 1 <= x < infinity

    const int BitsPerIteration = 36;
    long ux = BitConverter.DoubleToInt64Bits(x);

    int xExp = (int)(((ux & 0x7FF0000000000000) >> 52) - 1023);
    ux = ((ux & 0x000FFFFFFFFFFFFF) | 0x0010000000000000) >> 29;

    // Now ux is the mantissa bit pattern of x as a long integer
    long mask = 1;
    mask = (mask << BitsPerIteration) - 1;

    // Set first and last to the positions of the first and last chunks of (2 / PI) that we need
    int first = xExp / BitsPerIteration;
    int resultExp = xExp - (first * BitsPerIteration);

    // 120 is the theoretical maximum number of bits (actually
    // 115 for IEEE single precision) that we need to extract
    // from the middle of (2 / PI) to compute the reduced argument
    // accurately enough for our purposes

    int last = first + (120 / BitsPerIteration);

    // Unroll the loop. This is only correct because we know that bitsper is fixed as 36.

    long* result = stackalloc long[10];
    long u, carry;

    result[4] = 0;
    u = PiBits[last] * ux;

    result[3] = u & mask;
    carry = u >> BitsPerIteration;
    u = PiBits[last - 1] * ux + carry;

    result[2] = u & mask;
    carry = u >> BitsPerIteration;
    u = PiBits[last - 2] * ux + carry;

    result[1] = u & mask;
    carry = u >> BitsPerIteration;
    u = PiBits[first] * ux + carry;

    result[0] = u & mask;

    // Reconstruct the result
    int ltb = (int)((((result[0] << BitsPerIteration) | result[1]) >> (BitsPerIteration - 1 - resultExp)) & 7);

    long mantissa;
    long nextBits;

    // determ says whether the fractional part is >= 0.5
    bool determ = (ltb & 1) != 0;

    int i = 1;

    if (determ)
    {
    // The mantissa is >= 0.5. We want to subtract it from 1.0 by negating all the bits
    region = ((ltb >> 1) + 1) & 3;

    mantissa = 1;
    mantissa = ~(result[1]) & ((mantissa << (BitsPerIteration - resultExp)) - 1);

    while (mantissa < 0x0000000000010000)
    {
    i++;
    mantissa = (mantissa << BitsPerIteration) | (~(result[i]) & mask);
    }

    nextBits = (~(result[i + 1]) & mask);
    }
    else
    {
    region = (ltb >> 1);

    mantissa = 1;
    mantissa = result[1] & ((mantissa << (BitsPerIteration - resultExp)) - 1);

    while (mantissa < 0x0000000000010000)
    {
    i++;
    mantissa = (mantissa << BitsPerIteration) | result[i];
    }

    nextBits = result[i + 1];
    }

    // Normalize the mantissa.
    // The shift value 6 here, determined by trial and error, seems to give optimal speed.

    int bc = 0;

    while (mantissa < 0x0000400000000000)
    {
    bc += 6;
    mantissa <<= 6;
    }

    while (mantissa < 0x0010000000000000)
    {
    bc++;
    mantissa <<= 1;
    }

    mantissa |= nextBits >> (BitsPerIteration - bc);

    int rExp = 52 + resultExp - bc - i * BitsPerIteration;

    // Put the result exponent rexp onto the mantissa pattern
    u = (rExp + 1023L) << 52;
    ux = (mantissa & 0x000FFFFFFFFFFFFF) | u;

    if (determ)
    {
    // If we negated the mantissa we negate x too
    ux |= unchecked((long)(0x8000000000000000));
    }

    x = BitConverter.Int64BitsToDouble(ux);

    // x is a double precision version of the fractional part of
    // (x * (2 / PI)). Multiply x by (PI / 2) in double precision
    // to get the reduced result.

    return x * PiOverTwo;
    }

    [MethodImpl(MethodImplOptions.AggressiveInlining)]
    static double SinTaylorSeriesOneIteration(double x1)
    {
    // x - (x^3 / 3!)

    double x2 = x1 * x1;
    double x3 = x2 * x1;

    return x1 + (x3 * S1);
    }

    [MethodImpl(MethodImplOptions.AggressiveInlining)]
    static double SinTaylorSeriesFourIterations(double x1)
    {
    // x - (x^3 / 3!) + (x^5 / 5!) - (x^7 / 7!) + (x^9 / 9!)

    double x2 = x1 * x1;
    double x3 = x2 * x1;
    double x4 = x2 * x2;

    return x1 + ((S1 + (x2 * S2) + (x4 * (S3 + (x2 * S4)))) * x3);
    }
    }