math: clean up atan2.c

* remove volatile hacks
* don't care about inexact flag for now (removed all the +-tiny)
* fix atanl to raise underflow properly
* remove signed int arithmetics
* use pi/2 instead of pi_o_2 (gcc generates the same code, which is not
correct, but it does not matter: we mainly care about nearest rounding)
This commit is contained in:
Szabolcs Nagy 2013-08-15 14:05:19 +00:00
parent 31c5fb80b9
commit 6d85096f49
4 changed files with 73 additions and 103 deletions

View File

@ -39,75 +39,63 @@
#include "libm.h" #include "libm.h"
// FIXME
static const volatile double
tiny = 1.0e-300;
static const double static const double
pi_o_4 = 7.8539816339744827900E-01, /* 0x3FE921FB, 0x54442D18 */ pi = 3.1415926535897931160E+00, /* 0x400921FB, 0x54442D18 */
pi_o_2 = 1.5707963267948965580E+00, /* 0x3FF921FB, 0x54442D18 */
pi = 3.1415926535897931160E+00; /* 0x400921FB, 0x54442D18 */
static const volatile double
pi_lo = 1.2246467991473531772E-16; /* 0x3CA1A626, 0x33145C07 */ pi_lo = 1.2246467991473531772E-16; /* 0x3CA1A626, 0x33145C07 */
double atan2(double y, double x) double atan2(double y, double x)
{ {
double z; double z;
int32_t k,m,hx,hy,ix,iy; uint32_t m,lx,ly,ix,iy;
uint32_t lx,ly;
EXTRACT_WORDS(hx, lx, x); if (isnan(x) || isnan(y))
ix = hx & 0x7fffffff;
EXTRACT_WORDS(hy, ly, y);
iy = hy & 0x7fffffff;
if ((ix|((lx|-lx)>>31)) > 0x7ff00000 ||
(iy|((ly|-ly)>>31)) > 0x7ff00000) /* x or y is NaN */
return x+y; return x+y;
if ((hx-0x3ff00000 | lx) == 0) /* x = 1.0 */ EXTRACT_WORDS(ix, lx, x);
EXTRACT_WORDS(iy, ly, y);
if ((ix-0x3ff00000 | lx) == 0) /* x = 1.0 */
return atan(y); return atan(y);
m = ((hy>>31)&1) | ((hx>>30)&2); /* 2*sign(x)+sign(y) */ m = ((iy>>31)&1) | ((ix>>30)&2); /* 2*sign(x)+sign(y) */
ix = ix & 0x7fffffff;
iy = iy & 0x7fffffff;
/* when y = 0 */ /* when y = 0 */
if ((iy|ly) == 0) { if ((iy|ly) == 0) {
switch(m) { switch(m) {
case 0: case 0:
case 1: return y; /* atan(+-0,+anything)=+-0 */ case 1: return y; /* atan(+-0,+anything)=+-0 */
case 2: return pi+tiny; /* atan(+0,-anything) = pi */ case 2: return pi; /* atan(+0,-anything) = pi */
case 3: return -pi-tiny; /* atan(-0,-anything) =-pi */ case 3: return -pi; /* atan(-0,-anything) =-pi */
} }
} }
/* when x = 0 */ /* when x = 0 */
if ((ix|lx) == 0) if ((ix|lx) == 0)
return hy < 0 ? -pi_o_2-tiny : pi_o_2+tiny; return m&1 ? -pi/2 : pi/2;
/* when x is INF */ /* when x is INF */
if (ix == 0x7ff00000) { if (ix == 0x7ff00000) {
if (iy == 0x7ff00000) { if (iy == 0x7ff00000) {
switch(m) { switch(m) {
case 0: return pi_o_4+tiny; /* atan(+INF,+INF) */ case 0: return pi/4; /* atan(+INF,+INF) */
case 1: return -pi_o_4-tiny; /* atan(-INF,+INF) */ case 1: return -pi/4; /* atan(-INF,+INF) */
case 2: return 3.0*pi_o_4+tiny; /* atan(+INF,-INF) */ case 2: return 3*pi/4; /* atan(+INF,-INF) */
case 3: return -3.0*pi_o_4-tiny; /* atan(-INF,-INF) */ case 3: return -3*pi/4; /* atan(-INF,-INF) */
} }
} else { } else {
switch(m) { switch(m) {
case 0: return 0.0; /* atan(+...,+INF) */ case 0: return 0.0; /* atan(+...,+INF) */
case 1: return -0.0; /* atan(-...,+INF) */ case 1: return -0.0; /* atan(-...,+INF) */
case 2: return pi+tiny; /* atan(+...,-INF) */ case 2: return pi; /* atan(+...,-INF) */
case 3: return -pi-tiny; /* atan(-...,-INF) */ case 3: return -pi; /* atan(-...,-INF) */
} }
} }
} }
/* when y is INF */ /* |y/x| > 0x1p64 */
if (iy == 0x7ff00000) if (ix+(64<<20) < iy || iy == 0x7ff00000)
return hy < 0 ? -pi_o_2-tiny : pi_o_2+tiny; return m&1 ? -pi/2 : pi/2;
/* compute y/x */ /* z = atan(|y/x|) without spurious underflow */
k = (iy-ix)>>20; if ((m&2) && iy+(64<<20) < ix) /* |y/x| < 0x1p-64, x<0 */
if (k > 60) { /* |y/x| > 2**60 */ z = 0;
z = pi_o_2+0.5*pi_lo; else
m &= 1;
} else if (hx < 0 && k < -60) /* 0 > |y|/x > -2**-60 */
z = 0.0;
else /* safe to do y/x */
z = atan(fabs(y/x)); z = atan(fabs(y/x));
switch (m) { switch (m) {
case 0: return z; /* atan(+,+) */ case 0: return z; /* atan(+,+) */

View File

@ -15,72 +15,63 @@
#include "libm.h" #include "libm.h"
static const volatile float
tiny = 1.0e-30;
static const float static const float
pi_o_4 = 7.8539818525e-01, /* 0x3f490fdb */ pi = 3.1415927410e+00, /* 0x40490fdb */
pi_o_2 = 1.5707963705e+00, /* 0x3fc90fdb */
pi = 3.1415927410e+00; /* 0x40490fdb */
static const volatile float
pi_lo = -8.7422776573e-08; /* 0xb3bbbd2e */ pi_lo = -8.7422776573e-08; /* 0xb3bbbd2e */
float atan2f(float y, float x) float atan2f(float y, float x)
{ {
float z; float z;
int32_t k,m,hx,hy,ix,iy; uint32_t m,ix,iy;
GET_FLOAT_WORD(hx, x); if (isnan(x) || isnan(y))
ix = hx & 0x7fffffff;
GET_FLOAT_WORD(hy, y);
iy = hy & 0x7fffffff;
if (ix > 0x7f800000 || iy > 0x7f800000) /* x or y is NaN */
return x+y; return x+y;
if (hx == 0x3f800000) /* x=1.0 */ GET_FLOAT_WORD(ix, x);
GET_FLOAT_WORD(iy, y);
if (ix == 0x3f800000) /* x=1.0 */
return atanf(y); return atanf(y);
m = ((hy>>31)&1) | ((hx>>30)&2); /* 2*sign(x)+sign(y) */ m = ((iy>>31)&1) | ((ix>>30)&2); /* 2*sign(x)+sign(y) */
ix &= 0x7fffffff;
iy &= 0x7fffffff;
/* when y = 0 */ /* when y = 0 */
if (iy == 0) { if (iy == 0) {
switch (m) { switch (m) {
case 0: case 0:
case 1: return y; /* atan(+-0,+anything)=+-0 */ case 1: return y; /* atan(+-0,+anything)=+-0 */
case 2: return pi+tiny; /* atan(+0,-anything) = pi */ case 2: return pi; /* atan(+0,-anything) = pi */
case 3: return -pi-tiny; /* atan(-0,-anything) =-pi */ case 3: return -pi; /* atan(-0,-anything) =-pi */
} }
} }
/* when x = 0 */ /* when x = 0 */
if (ix == 0) if (ix == 0)
return hy < 0 ? -pi_o_2-tiny : pi_o_2+tiny; return m&1 ? -pi/2 : pi/2;
/* when x is INF */ /* when x is INF */
if (ix == 0x7f800000) { if (ix == 0x7f800000) {
if (iy == 0x7f800000) { if (iy == 0x7f800000) {
switch (m) { switch (m) {
case 0: return pi_o_4+tiny; /* atan(+INF,+INF) */ case 0: return pi/4; /* atan(+INF,+INF) */
case 1: return -pi_o_4-tiny; /* atan(-INF,+INF) */ case 1: return -pi/4; /* atan(-INF,+INF) */
case 2: return 3.0f*pi_o_4+tiny; /*atan(+INF,-INF)*/ case 2: return 3*pi/4; /*atan(+INF,-INF)*/
case 3: return -3.0f*pi_o_4-tiny; /*atan(-INF,-INF)*/ case 3: return -3*pi/4; /*atan(-INF,-INF)*/
} }
} else { } else {
switch (m) { switch (m) {
case 0: return 0.0f; /* atan(+...,+INF) */ case 0: return 0.0f; /* atan(+...,+INF) */
case 1: return -0.0f; /* atan(-...,+INF) */ case 1: return -0.0f; /* atan(-...,+INF) */
case 2: return pi+tiny; /* atan(+...,-INF) */ case 2: return pi; /* atan(+...,-INF) */
case 3: return -pi-tiny; /* atan(-...,-INF) */ case 3: return -pi; /* atan(-...,-INF) */
} }
} }
} }
/* when y is INF */ /* |y/x| > 0x1p26 */
if (iy == 0x7f800000) if (ix+(26<<23) < iy || iy == 0x7f800000)
return hy < 0 ? -pi_o_2-tiny : pi_o_2+tiny; return m&1 ? -pi/2 : pi/2;
/* compute y/x */ /* z = atan(|y/x|) with correct underflow */
k = (iy-ix)>>23; if ((m&2) && iy+(26<<23) < ix) /*|y/x| < 0x1p-26, x < 0 */
if (k > 26) { /* |y/x| > 2**26 */
z = pi_o_2 + 0.5f*pi_lo;
m &= 1;
} else if (k < -26 && hx < 0) /* 0 > |y|/x > -2**-26 */
z = 0.0; z = 0.0;
else /* safe to do y/x */ else
z = atanf(fabsf(y/x)); z = atanf(fabsf(y/x));
switch (m) { switch (m) {
case 0: return z; /* atan(+,+) */ case 0: return z; /* atan(+,+) */

View File

@ -29,26 +29,22 @@ long double atan2l(long double y, long double x)
{ {
union IEEEl2bits ux, uy; union IEEEl2bits ux, uy;
long double z; long double z;
int32_t k,m; int m;
int16_t exptx, expsignx, expty, expsigny; uint16_t exptx, expsignx, expty, expsigny;
if (isnan(x) || isnan(y))
return x+y;
if (x == 1)
return atanl(y);
uy.e = y; uy.e = y;
expsigny = uy.xbits.expsign; expsigny = uy.xbits.expsign;
expty = expsigny & 0x7fff; expty = expsigny & 0x7fff;
ux.e = x; ux.e = x;
expsignx = ux.xbits.expsign; expsignx = ux.xbits.expsign;
exptx = expsignx & 0x7fff; exptx = expsignx & 0x7fff;
if ((exptx==0x7fff &&
((ux.bits.manh&~LDBL_NBIT)|ux.bits.manl)!=0) || /* x is NaN */
(expty==0x7fff &&
((uy.bits.manh&~LDBL_NBIT)|uy.bits.manl)!=0)) /* y is NaN */
return x+y;
if (expsignx==0x3fff && ((ux.bits.manh&~LDBL_NBIT)|ux.bits.manl)==0) /* x=1.0 */
return atanl(y);
m = ((expsigny>>15)&1) | ((expsignx>>14)&2); /* 2*sign(x)+sign(y) */ m = ((expsigny>>15)&1) | ((expsignx>>14)&2); /* 2*sign(x)+sign(y) */
/* when y = 0 */ if (y == 0) {
if (expty==0 && ((uy.bits.manh&~LDBL_NBIT)|uy.bits.manl)==0) {
switch(m) { switch(m) {
case 0: case 0:
case 1: return y; /* atan(+-0,+anything)=+-0 */ case 1: return y; /* atan(+-0,+anything)=+-0 */
@ -56,9 +52,8 @@ long double atan2l(long double y, long double x)
case 3: return -2*pio2_hi-0x1p-120f; /* atan(-0,-anything) =-pi */ case 3: return -2*pio2_hi-0x1p-120f; /* atan(-0,-anything) =-pi */
} }
} }
/* when x = 0 */ if (x == 0)
if (exptx==0 && ((ux.bits.manh&~LDBL_NBIT)|ux.bits.manl)==0) return m&1 ? -pio2_hi-0x1p-120f : pio2_hi+0x1p-120f;
return expsigny < 0 ? -pio2_hi-0x1p-120f : pio2_hi+0x1p-120f;
/* when x is INF */ /* when x is INF */
if (exptx == 0x7fff) { if (exptx == 0x7fff) {
if (expty == 0x7fff) { if (expty == 0x7fff) {
@ -78,17 +73,12 @@ long double atan2l(long double y, long double x)
} }
} }
/* when y is INF */ /* when y is INF */
if (expty == 0x7fff) if (exptx+120 < expty || expty == 0x7fff)
return expsigny < 0 ? -pio2_hi-0x1p-120f : pio2_hi+0x1p-120f; return m&1 ? -pio2_hi-0x1p-120f : pio2_hi+0x1p-120f;
/* compute y/x */ if ((m&2) && expty+120 < exptx) /* |y/x| tiny, x<0 */
k = expty-exptx;
if(k > LDBL_MANT_DIG+2) { /* |y/x| huge */
z = pio2_hi+0x1p-120f;
m &= 1;
} else if (expsignx < 0 && k < -LDBL_MANT_DIG-2) /* |y/x| tiny, x<0 */
z = 0.0; z = 0.0;
else /* safe to do y/x */ else
z = atanl(fabsl(y/x)); z = atanl(fabsl(y/x));
switch (m) { switch (m) {
case 0: return z; /* atan(+,+) */ case 0: return z; /* atan(+,+) */

View File

@ -70,8 +70,8 @@ long double atanl(long double x)
union IEEEl2bits u; union IEEEl2bits u;
long double w,s1,s2,z; long double w,s1,s2,z;
int id; int id;
int16_t expsign, expt; uint16_t expsign, expt;
int32_t expman; uint32_t expman;
u.e = x; u.e = x;
expsign = u.xbits.expsign; expsign = u.xbits.expsign;
@ -81,15 +81,16 @@ long double atanl(long double x)
((u.bits.manh&~LDBL_NBIT)|u.bits.manl)!=0) /* NaN */ ((u.bits.manh&~LDBL_NBIT)|u.bits.manl)!=0) /* NaN */
return x+x; return x+x;
z = atanhi[3] + 0x1p-120f; z = atanhi[3] + 0x1p-120f;
return expsign < 0 ? -z : z; return expsign>>15 ? -z : z;
} }
/* Extract the exponent and the first few bits of the mantissa. */ /* Extract the exponent and the first few bits of the mantissa. */
/* XXX There should be a more convenient way to do this. */ /* XXX There should be a more convenient way to do this. */
expman = (expt << 8) | ((u.bits.manh >> (LDBL_MANH_SIZE - 9)) & 0xff); expman = (expt << 8) | ((u.bits.manh >> (LDBL_MANH_SIZE - 9)) & 0xff);
if (expman < ((0x3fff - 2) << 8) + 0xc0) { /* |x| < 0.4375 */ if (expman < ((0x3fff - 2) << 8) + 0xc0) { /* |x| < 0.4375 */
if (expt < 0x3fff - 32) { /* if |x| is small, atanl(x)~=x */ if (expt < 0x3fff - 32) { /* if |x| is small, atanl(x)~=x */
/* raise inexact if x!=0 */ /* raise underflow if subnormal */
FORCE_EVAL(x + 0x1p120f); if (expt == 0)
FORCE_EVAL((float)x);
return x; return x;
} }
id = -1; id = -1;
@ -122,6 +123,6 @@ long double atanl(long double x)
if (id < 0) if (id < 0)
return x - x*(s1+s2); return x - x*(s1+s2);
z = atanhi[id] - ((x*(s1+s2) - atanlo[id]) - x); z = atanhi[id] - ((x*(s1+s2) - atanlo[id]) - x);
return expsign < 0 ? -z : z; return expsign>>15 ? -z : z;
} }
#endif #endif