mirror of
https://github.com/fluencelabs/musl
synced 2025-06-30 23:21:56 +00:00
math: use double_t for temporaries to avoid stores on i386
When FLT_EVAL_METHOD!=0 (only i386 with x87 fp) the excess precision of an expression must be removed in an assignment. (gcc needs -fexcess-precision=standard or -std=c99 for this) This is done by extra load/store instructions which adds code bloat when lot of temporaries are used and it makes the result less precise in many cases. Using double_t and float_t avoids these issues on i386 and it makes no difference on other archs. For now only a few functions are modified where the excess precision is clearly beneficial (mostly polynomial evaluations with temporaries). object size differences on i386, gcc-4.8: old new __cosdf.o 123 95 __cos.o 199 169 __sindf.o 131 95 __sin.o 225 203 __tandf.o 207 151 __tan.o 605 499 erff.o 1470 1416 erf.o 1703 1649 j0f.o 1779 1745 j0.o 2308 2274 j1f.o 1602 1568 j1.o 2286 2252 tgamma.o 1431 1424 math/*.o 64164 63635
This commit is contained in:
@ -60,7 +60,7 @@ C6 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */
|
|||||||
|
|
||||||
double __cos(double x, double y)
|
double __cos(double x, double y)
|
||||||
{
|
{
|
||||||
double hz,z,r,w;
|
double_t hz,z,r,w;
|
||||||
|
|
||||||
z = x*x;
|
z = x*x;
|
||||||
w = z*z;
|
w = z*z;
|
||||||
|
@ -25,7 +25,7 @@ C3 = 0x199342e0ee5069.0p-68; /* 0.0000243904487962774090654 */
|
|||||||
|
|
||||||
float __cosdf(double x)
|
float __cosdf(double x)
|
||||||
{
|
{
|
||||||
double r, w, z;
|
double_t r, w, z;
|
||||||
|
|
||||||
/* Try to optimize for parallel evaluation as in __tandf.c. */
|
/* Try to optimize for parallel evaluation as in __tandf.c. */
|
||||||
z = x*x;
|
z = x*x;
|
||||||
|
@ -81,7 +81,7 @@ Lg7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */
|
|||||||
*/
|
*/
|
||||||
static inline double __log1p(double f)
|
static inline double __log1p(double f)
|
||||||
{
|
{
|
||||||
double hfsq,s,z,R,w,t1,t2;
|
double_t hfsq,s,z,R,w,t1,t2;
|
||||||
|
|
||||||
s = f/(2.0+f);
|
s = f/(2.0+f);
|
||||||
z = s*s;
|
z = s*s;
|
||||||
|
@ -22,7 +22,7 @@ Lg4 = 0xf89e26.0p-26; /* 0.24279078841 */
|
|||||||
|
|
||||||
static inline float __log1pf(float f)
|
static inline float __log1pf(float f)
|
||||||
{
|
{
|
||||||
float hfsq,s,z,R,w,t1,t2;
|
float_t hfsq,s,z,R,w,t1,t2;
|
||||||
|
|
||||||
s = f/(2.0f + f);
|
s = f/(2.0f + f);
|
||||||
z = s*s;
|
z = s*s;
|
||||||
|
@ -51,7 +51,7 @@ S6 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */
|
|||||||
|
|
||||||
double __sin(double x, double y, int iy)
|
double __sin(double x, double y, int iy)
|
||||||
{
|
{
|
||||||
double z,r,v,w;
|
double_t z,r,v,w;
|
||||||
|
|
||||||
z = x*x;
|
z = x*x;
|
||||||
w = z*z;
|
w = z*z;
|
||||||
|
@ -25,7 +25,7 @@ S4 = 0x16cd878c3b46a7.0p-71; /* 0.0000027183114939898219064 */
|
|||||||
|
|
||||||
float __sindf(double x)
|
float __sindf(double x)
|
||||||
{
|
{
|
||||||
double r, s, w, z;
|
double_t r, s, w, z;
|
||||||
|
|
||||||
/* Try to optimize for parallel evaluation as in __tandf.c. */
|
/* Try to optimize for parallel evaluation as in __tandf.c. */
|
||||||
z = x*x;
|
z = x*x;
|
||||||
|
@ -65,7 +65,7 @@ pio4lo = 3.06161699786838301793e-17; /* 3C81A626, 33145C07 */
|
|||||||
|
|
||||||
double __tan(double x, double y, int iy)
|
double __tan(double x, double y, int iy)
|
||||||
{
|
{
|
||||||
double z, r, v, w, s, sign;
|
double_t z, r, v, w, s, sign;
|
||||||
int32_t ix, hx;
|
int32_t ix, hx;
|
||||||
|
|
||||||
GET_HIGH_WORD(hx,x);
|
GET_HIGH_WORD(hx,x);
|
||||||
@ -106,7 +106,8 @@ double __tan(double x, double y, int iy)
|
|||||||
* -1.0 / (x+r) here
|
* -1.0 / (x+r) here
|
||||||
*/
|
*/
|
||||||
/* compute -1.0 / (x+r) accurately */
|
/* compute -1.0 / (x+r) accurately */
|
||||||
double a, t;
|
double_t a;
|
||||||
|
double z, t;
|
||||||
z = w;
|
z = w;
|
||||||
SET_LOW_WORD(z,0);
|
SET_LOW_WORD(z,0);
|
||||||
v = r - (z - x); /* z+v = r+x */
|
v = r - (z - x); /* z+v = r+x */
|
||||||
|
@ -27,7 +27,7 @@ static const double T[] = {
|
|||||||
|
|
||||||
float __tandf(double x, int iy)
|
float __tandf(double x, int iy)
|
||||||
{
|
{
|
||||||
double z,r,w,s,t,u;
|
double_t z,r,w,s,t,u;
|
||||||
|
|
||||||
z = x*x;
|
z = x*x;
|
||||||
/*
|
/*
|
||||||
|
@ -51,7 +51,7 @@ qS4 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */
|
|||||||
|
|
||||||
static double R(double z)
|
static double R(double z)
|
||||||
{
|
{
|
||||||
double p, q;
|
double_t p, q;
|
||||||
p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5)))));
|
p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5)))));
|
||||||
q = 1.0+z*(qS1+z*(qS2+z*(qS3+z*qS4)));
|
q = 1.0+z*(qS1+z*(qS2+z*(qS3+z*qS4)));
|
||||||
return p/q;
|
return p/q;
|
||||||
|
@ -25,7 +25,7 @@ qS1 = -7.0662963390e-01;
|
|||||||
|
|
||||||
static float R(float z)
|
static float R(float z)
|
||||||
{
|
{
|
||||||
float p, q;
|
float_t p, q;
|
||||||
p = z*(pS0+z*(pS1+z*pS2));
|
p = z*(pS0+z*(pS1+z*pS2));
|
||||||
q = 1.0f+z*qS1;
|
q = 1.0f+z*qS1;
|
||||||
return p/q;
|
return p/q;
|
||||||
|
@ -58,7 +58,7 @@ qS4 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */
|
|||||||
|
|
||||||
static double R(double z)
|
static double R(double z)
|
||||||
{
|
{
|
||||||
double p, q;
|
double_t p, q;
|
||||||
p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5)))));
|
p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5)))));
|
||||||
q = 1.0+z*(qS1+z*(qS2+z*(qS3+z*qS4)));
|
q = 1.0+z*(qS1+z*(qS2+z*(qS3+z*qS4)));
|
||||||
return p/q;
|
return p/q;
|
||||||
|
@ -26,7 +26,7 @@ qS1 = -7.0662963390e-01;
|
|||||||
|
|
||||||
static float R(float z)
|
static float R(float z)
|
||||||
{
|
{
|
||||||
float p, q;
|
float_t p, q;
|
||||||
p = z*(pS0+z*(pS1+z*pS2));
|
p = z*(pS0+z*(pS1+z*pS2));
|
||||||
q = 1.0f+z*qS1;
|
q = 1.0f+z*qS1;
|
||||||
return p/q;
|
return p/q;
|
||||||
|
@ -62,7 +62,7 @@ static const double aT[] = {
|
|||||||
|
|
||||||
double atan(double x)
|
double atan(double x)
|
||||||
{
|
{
|
||||||
double w,s1,s2,z;
|
double_t w,s1,s2,z;
|
||||||
uint32_t ix,sign;
|
uint32_t ix,sign;
|
||||||
int id;
|
int id;
|
||||||
|
|
||||||
|
@ -40,7 +40,7 @@ static const float aT[] = {
|
|||||||
|
|
||||||
float atanf(float x)
|
float atanf(float x)
|
||||||
{
|
{
|
||||||
float w,s1,s2,z;
|
float_t w,s1,s2,z;
|
||||||
uint32_t ix,sign;
|
uint32_t ix,sign;
|
||||||
int id;
|
int id;
|
||||||
|
|
||||||
|
@ -176,7 +176,7 @@ sb7 = -2.24409524465858183362e+01; /* 0xC03670E2, 0x42712D62 */
|
|||||||
|
|
||||||
static double erfc1(double x)
|
static double erfc1(double x)
|
||||||
{
|
{
|
||||||
double s,P,Q;
|
double_t s,P,Q;
|
||||||
|
|
||||||
s = fabs(x) - 1;
|
s = fabs(x) - 1;
|
||||||
P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6)))));
|
P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6)))));
|
||||||
@ -186,7 +186,8 @@ static double erfc1(double x)
|
|||||||
|
|
||||||
static double erfc2(uint32_t ix, double x)
|
static double erfc2(uint32_t ix, double x)
|
||||||
{
|
{
|
||||||
double s,z,R,S;
|
double_t s,R,S;
|
||||||
|
double z;
|
||||||
|
|
||||||
if (ix < 0x3ff40000) /* |x| < 1.25 */
|
if (ix < 0x3ff40000) /* |x| < 1.25 */
|
||||||
return erfc1(x);
|
return erfc1(x);
|
||||||
|
@ -86,7 +86,7 @@ sb7 = -2.2440952301e+01; /* 0xc1b38712 */
|
|||||||
|
|
||||||
static float erfc1(float x)
|
static float erfc1(float x)
|
||||||
{
|
{
|
||||||
float s,P,Q;
|
float_t s,P,Q;
|
||||||
|
|
||||||
s = fabsf(x) - 1;
|
s = fabsf(x) - 1;
|
||||||
P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6)))));
|
P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6)))));
|
||||||
@ -96,7 +96,8 @@ static float erfc1(float x)
|
|||||||
|
|
||||||
static float erfc2(uint32_t ix, float x)
|
static float erfc2(uint32_t ix, float x)
|
||||||
{
|
{
|
||||||
float s,z,R,S;
|
float_t s,R,S;
|
||||||
|
float z;
|
||||||
|
|
||||||
if (ix < 0x3fa00000) /* |x| < 1.25 */
|
if (ix < 0x3fa00000) /* |x| < 1.25 */
|
||||||
return erfc1(x);
|
return erfc1(x);
|
||||||
|
@ -263,7 +263,7 @@ static const double pS2[5] = {
|
|||||||
static double pzero(double x)
|
static double pzero(double x)
|
||||||
{
|
{
|
||||||
const double *p,*q;
|
const double *p,*q;
|
||||||
double z,r,s;
|
double_t z,r,s;
|
||||||
uint32_t ix;
|
uint32_t ix;
|
||||||
|
|
||||||
GET_HIGH_WORD(ix, x);
|
GET_HIGH_WORD(ix, x);
|
||||||
@ -359,7 +359,7 @@ static const double qS2[6] = {
|
|||||||
static double qzero(double x)
|
static double qzero(double x)
|
||||||
{
|
{
|
||||||
const double *p,*q;
|
const double *p,*q;
|
||||||
double s,r,z;
|
double_t s,r,z;
|
||||||
uint32_t ix;
|
uint32_t ix;
|
||||||
|
|
||||||
GET_HIGH_WORD(ix, x);
|
GET_HIGH_WORD(ix, x);
|
||||||
|
@ -201,7 +201,7 @@ static const float pS2[5] = {
|
|||||||
static float pzerof(float x)
|
static float pzerof(float x)
|
||||||
{
|
{
|
||||||
const float *p,*q;
|
const float *p,*q;
|
||||||
float z,r,s;
|
float_t z,r,s;
|
||||||
uint32_t ix;
|
uint32_t ix;
|
||||||
|
|
||||||
GET_FLOAT_WORD(ix, x);
|
GET_FLOAT_WORD(ix, x);
|
||||||
@ -297,7 +297,7 @@ static const float qS2[6] = {
|
|||||||
static float qzerof(float x)
|
static float qzerof(float x)
|
||||||
{
|
{
|
||||||
const float *p,*q;
|
const float *p,*q;
|
||||||
float s,r,z;
|
float_t s,r,z;
|
||||||
uint32_t ix;
|
uint32_t ix;
|
||||||
|
|
||||||
GET_FLOAT_WORD(ix, x);
|
GET_FLOAT_WORD(ix, x);
|
||||||
|
@ -250,7 +250,7 @@ static const double ps2[5] = {
|
|||||||
static double pone(double x)
|
static double pone(double x)
|
||||||
{
|
{
|
||||||
const double *p,*q;
|
const double *p,*q;
|
||||||
double z,r,s;
|
double_t z,r,s;
|
||||||
uint32_t ix;
|
uint32_t ix;
|
||||||
|
|
||||||
GET_HIGH_WORD(ix, x);
|
GET_HIGH_WORD(ix, x);
|
||||||
@ -346,7 +346,7 @@ static const double qs2[6] = {
|
|||||||
static double qone(double x)
|
static double qone(double x)
|
||||||
{
|
{
|
||||||
const double *p,*q;
|
const double *p,*q;
|
||||||
double s,r,z;
|
double_t s,r,z;
|
||||||
uint32_t ix;
|
uint32_t ix;
|
||||||
|
|
||||||
GET_HIGH_WORD(ix, x);
|
GET_HIGH_WORD(ix, x);
|
||||||
|
@ -198,7 +198,7 @@ static const float ps2[5] = {
|
|||||||
static float ponef(float x)
|
static float ponef(float x)
|
||||||
{
|
{
|
||||||
const float *p,*q;
|
const float *p,*q;
|
||||||
float z,r,s;
|
float_t z,r,s;
|
||||||
uint32_t ix;
|
uint32_t ix;
|
||||||
|
|
||||||
GET_FLOAT_WORD(ix, x);
|
GET_FLOAT_WORD(ix, x);
|
||||||
@ -294,7 +294,7 @@ static const float qs2[6] = {
|
|||||||
static float qonef(float x)
|
static float qonef(float x)
|
||||||
{
|
{
|
||||||
const float *p,*q;
|
const float *p,*q;
|
||||||
float s,r,z;
|
float_t s,r,z;
|
||||||
uint32_t ix;
|
uint32_t ix;
|
||||||
|
|
||||||
GET_FLOAT_WORD(ix, x);
|
GET_FLOAT_WORD(ix, x);
|
||||||
|
@ -89,7 +89,7 @@ static const double fact[] = {
|
|||||||
/* S(x) rational function for positive x */
|
/* S(x) rational function for positive x */
|
||||||
static double S(double x)
|
static double S(double x)
|
||||||
{
|
{
|
||||||
double num = 0, den = 0;
|
double_t num = 0, den = 0;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
/* to avoid overflow handle large x differently */
|
/* to avoid overflow handle large x differently */
|
||||||
|
Reference in New Issue
Block a user