mirror of
https://github.com/fluencelabs/musl
synced 2025-06-29 22:51:55 +00:00
* use unsigned arithmetics on the representation * store arg reduction quotient in unsigned (so n%2 would work like n&1) * use different convention to pass the arg reduction bit to __tan (this argument used to be 1 for even and -1 for odd reduction which meant obscure bithacks, the new n&1 is cleaner) * raise inexact and underflow flags correctly for small x (tanl(x) may still raise spurious underflow for small but normal x) (this exception raising code increases codesize a bit, similar fixes are needed in many other places, it may worth investigating at some point if the inexact and underflow flags are worth raising correctly as this is not strictly required by the standard) * tanf manual reduction optimization is kept for now * tanl code path is cleaned up to follow similar logic to tan and tanf
55 lines
1.8 KiB
C
55 lines
1.8 KiB
C
/* origin: FreeBSD /usr/src/lib/msun/src/k_tanf.c */
|
|
/*
|
|
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
|
|
* Optimized by Bruce D. Evans.
|
|
*/
|
|
/*
|
|
* ====================================================
|
|
* Copyright 2004 Sun Microsystems, Inc. All Rights Reserved.
|
|
*
|
|
* Permission to use, copy, modify, and distribute this
|
|
* software is freely granted, provided that this notice
|
|
* is preserved.
|
|
* ====================================================
|
|
*/
|
|
|
|
#include "libm.h"
|
|
|
|
/* |tan(x)/x - t(x)| < 2**-25.5 (~[-2e-08, 2e-08]). */
|
|
static const double T[] = {
|
|
0x15554d3418c99f.0p-54, /* 0.333331395030791399758 */
|
|
0x1112fd38999f72.0p-55, /* 0.133392002712976742718 */
|
|
0x1b54c91d865afe.0p-57, /* 0.0533812378445670393523 */
|
|
0x191df3908c33ce.0p-58, /* 0.0245283181166547278873 */
|
|
0x185dadfcecf44e.0p-61, /* 0.00297435743359967304927 */
|
|
0x1362b9bf971bcd.0p-59, /* 0.00946564784943673166728 */
|
|
};
|
|
|
|
float __tandf(double x, int odd)
|
|
{
|
|
double_t z,r,w,s,t,u;
|
|
|
|
z = x*x;
|
|
/*
|
|
* Split up the polynomial into small independent terms to give
|
|
* opportunities for parallel evaluation. The chosen splitting is
|
|
* micro-optimized for Athlons (XP, X64). It costs 2 multiplications
|
|
* relative to Horner's method on sequential machines.
|
|
*
|
|
* We add the small terms from lowest degree up for efficiency on
|
|
* non-sequential machines (the lowest degree terms tend to be ready
|
|
* earlier). Apart from this, we don't care about order of
|
|
* operations, and don't need to to care since we have precision to
|
|
* spare. However, the chosen splitting is good for accuracy too,
|
|
* and would give results as accurate as Horner's method if the
|
|
* small terms were added from highest degree down.
|
|
*/
|
|
r = T[4] + z*T[5];
|
|
t = T[2] + z*T[3];
|
|
w = z*z;
|
|
s = z*x;
|
|
u = T[0] + z*T[1];
|
|
r = (x + s*u) + (s*w)*(t + w*r);
|
|
return odd ? -1.0/r : r;
|
|
}
|