// This is a port of the amd/win-libm implementation provided in assembly here: https://github.com/amd/win-libm/blob/master/sinf.asm // The original source is Copyright (c) 2002-2019 Advanced Micro Devices, Inc. and provided under the MIT License. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this Software and associated documentaon files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. const double PiOverTwo = 1.5707963267948966; const double PiOverTwoPartOne = 1.5707963267341256; const double PiOverTwoPartOneTail = 6.077100506506192E-11; const double PiOverTwoPartTwo = 6.077100506303966E-11; const double PiOverTwoPartTwoTail = 2.0222662487959506E-21; const double PiOverFour = 0.7853981633974483; const double TwoOverPi = 0.6366197723675814; const double TwoPowNegSeven = 0.0078125; const double TwoPowNegThirteen = 0.0001220703125; const double C0 = -1.0 / 2.0; // 1 / 2! const double C1 = +1.0 / 24.0; // 1 / 4! const double C2 = -1.0 / 720.0; // 1 / 6! const double C3 = +1.0 / 40320.0; // 1 / 8! const double C4 = -1.0 / 3628800.0; // 1 / 10! const double S1 = -1.0 / 6.0; // 1 / 3! const double S2 = +1.0 / 120.0; // 1 / 5! const double S3 = -1.0 / 5040.0; // 1 / 7! const double S4 = +1.0 / 362880.0; // 1 / 9! private static readonly long[] PiBits = new long[] { 0, 5215, 13000023176, 11362338026, 67174558139, 34819822259, 10612056195, 67816420731, 57840157550, 19558516809, 50025467026, 25186875954, 18152700886 }; public static float Sin(float x) { double result = x; if (float.IsFinite(x)) { double ax = Math.Abs(x); if (ax <= PiOverFour) { if (ax >= TwoPowNegSeven) { result = SinTaylorSeriesFourIterations(x); } else if (ax >= TwoPowNegThirteen) { result = SinTaylorSeriesOneIteration(x); } else { result = x; } } else { int wasNegative = 0; if (float.IsNegative(x)) { x = -x; wasNegative = 1; } int region; if (x < 16000000.0) { // Reduce x to be in the range of -(PI / 4) to (PI / 4), inclusive // This is done by subtracting multiples of (PI / 2). Double-precision // isn't quite accurate enough and introduces some error, but we account // for that using a tail value that helps account for this. long axExp = BitConverter.DoubleToInt64Bits(ax) >> 52; region = (int)(x * TwoOverPi + 0.5); double piOverTwoCount = region; double rHead = x - (piOverTwoCount * PiOverTwoPartOne); double rTail = (piOverTwoCount * PiOverTwoPartOneTail); double r = rHead - rTail; long rExp = (BitConverter.DoubleToInt64Bits(r) << 1) >> 53; if ((axExp - rExp) > 15) { // The remainder is pretty small compared with x, which implies that x is // near a multiple of (PI / 2). That is, x matches the multiple to at least // 15 bits and so we perform an additional fixup to account for any error r = rHead; rTail = (piOverTwoCount * PiOverTwoPartTwo); rHead = r - rTail; rTail = (piOverTwoCount * PiOverTwoPartTwoTail) - ((r - rHead) - rTail); r = rHead - rTail; } if (rExp >= 0x3F2) // r >= 2^-13 { if ((region & 1) == 0) // region 0 or 2 { result = SinTaylorSeriesFourIterations(r); } else // region 1 or 3 { result = CosTaylorSeriesFourIterations(r); } } else if (rExp > 0x3DE) // r > 1.1641532182693481E-10 { if ((region & 1) == 0) // region 0 or 2 { result = SinTaylorSeriesOneIteration(r); } else // region 1 or 3 { result = CosTaylorSeriesOneIteration(r); } } else { if ((region & 1) == 0) // region 0 or 2 { result = r; } else // region 1 or 3 { result = 1; } } } else { double r = ReduceForLargeInput(x, out region); if ((region & 1) == 0) // region 0 or 2 { result = SinTaylorSeriesFourIterations(r); } else // region 1 or 3 { result = CosTaylorSeriesFourIterations(r); } } region >>= 1; int tmp1 = region & wasNegative; region = ~region; wasNegative = ~wasNegative; int tmp2 = region & wasNegative; if (((tmp1 | tmp2) & 1) == 0) { // If the original region was 0/1 and arg is negative, then we negate the result. // -or- // If the original region was 2/3 and arg is positive, then we negate the result. result = -result; } } } return (float)result; [MethodImpl(MethodImplOptions.AggressiveInlining)] static double CosTaylorSeriesOneIteration(double x1) { // 1 - (x^2 / 2!) double x2 = x1 * x1; return 1.0 + (x2 * C1); } [MethodImpl(MethodImplOptions.AggressiveInlining)] static double CosTaylorSeriesFourIterations(double x1) { // 1 - (x^2 / 2!) + (x^4 / 4!) - (x^6 / 6!) + (x^8 / 8!) - (x^10 / 10!) double x2 = x1 * x1; double x4 = x2 * x2; return 1.0 + (x2 * C0) + (x4 * ((C1 + (x2 * C2)) + (x4 * (C3 + (x2 * C4))))); } static unsafe double ReduceForLargeInput(double x, out int region) { Debug.Assert(!double.IsNegative(x)); // This method simulates multi-precision floating-point // arithmetic and is accurate for all 1 <= x < infinity const int BitsPerIteration = 36; long ux = BitConverter.DoubleToInt64Bits(x); int xExp = (int)(((ux & 0x7FF0000000000000) >> 52) - 1023); ux = ((ux & 0x000FFFFFFFFFFFFF) | 0x0010000000000000) >> 29; // Now ux is the mantissa bit pattern of x as a long integer long mask = 1; mask = (mask << BitsPerIteration) - 1; // Set first and last to the positions of the first and last chunks of (2 / PI) that we need int first = xExp / BitsPerIteration; int resultExp = xExp - (first * BitsPerIteration); // 120 is the theoretical maximum number of bits (actually // 115 for IEEE single precision) that we need to extract // from the middle of (2 / PI) to compute the reduced argument // accurately enough for our purposes int last = first + (120 / BitsPerIteration); // Unroll the loop. This is only correct because we know that bitsper is fixed as 36. long* result = stackalloc long[10]; long u, carry; result[4] = 0; u = PiBits[last] * ux; result[3] = u & mask; carry = u >> BitsPerIteration; u = PiBits[last - 1] * ux + carry; result[2] = u & mask; carry = u >> BitsPerIteration; u = PiBits[last - 2] * ux + carry; result[1] = u & mask; carry = u >> BitsPerIteration; u = PiBits[first] * ux + carry; result[0] = u & mask; // Reconstruct the result int ltb = (int)((((result[0] << BitsPerIteration) | result[1]) >> (BitsPerIteration - 1 - resultExp)) & 7); long mantissa; long nextBits; // determ says whether the fractional part is >= 0.5 bool determ = (ltb & 1) != 0; int i = 1; if (determ) { // The mantissa is >= 0.5. We want to subtract it from 1.0 by negating all the bits region = ((ltb >> 1) + 1) & 3; mantissa = 1; mantissa = ~(result[1]) & ((mantissa << (BitsPerIteration - resultExp)) - 1); while (mantissa < 0x0000000000010000) { i++; mantissa = (mantissa << BitsPerIteration) | (~(result[i]) & mask); } nextBits = (~(result[i + 1]) & mask); } else { region = (ltb >> 1); mantissa = 1; mantissa = result[1] & ((mantissa << (BitsPerIteration - resultExp)) - 1); while (mantissa < 0x0000000000010000) { i++; mantissa = (mantissa << BitsPerIteration) | result[i]; } nextBits = result[i + 1]; } // Normalize the mantissa. // The shift value 6 here, determined by trial and error, seems to give optimal speed. int bc = 0; while (mantissa < 0x0000400000000000) { bc += 6; mantissa <<= 6; } while (mantissa < 0x0010000000000000) { bc++; mantissa <<= 1; } mantissa |= nextBits >> (BitsPerIteration - bc); int rExp = 52 + resultExp - bc - i * BitsPerIteration; // Put the result exponent rexp onto the mantissa pattern u = (rExp + 1023L) << 52; ux = (mantissa & 0x000FFFFFFFFFFFFF) | u; if (determ) { // If we negated the mantissa we negate x too ux |= unchecked((long)(0x8000000000000000)); } x = BitConverter.Int64BitsToDouble(ux); // x is a double precision version of the fractional part of // (x * (2 / PI)). Multiply x by (PI / 2) in double precision // to get the reduced result. return x * PiOverTwo; } [MethodImpl(MethodImplOptions.AggressiveInlining)] static double SinTaylorSeriesOneIteration(double x1) { // x - (x^3 / 3!) double x2 = x1 * x1; double x3 = x2 * x1; return x1 + (x3 * S1); } [MethodImpl(MethodImplOptions.AggressiveInlining)] static double SinTaylorSeriesFourIterations(double x1) { // x - (x^3 / 3!) + (x^5 / 5!) - (x^7 / 7!) + (x^9 / 9!) double x2 = x1 * x1; double x3 = x2 * x1; double x4 = x2 * x2; return x1 + ((S1 + (x2 * S2) + (x4 * (S3 + (x2 * S4)))) * x3); } }