mirror of
https://github.com/fluencelabs/musl
synced 2025-06-15 16:01:41 +00:00
optimize floatscan: avoid excessive upscaling
upscaling by even one step too much creates 3-29 extra iterations for the next loop. this is still suboptimal since it always goes by 2^29 rather than using a smaller upscale factor when nearing the target, but performance on common, small-magnitude, few-digit values has already more than doubled with this change. more optimizations on the way...
This commit is contained in:
@ -138,7 +138,33 @@ static long double decfloat(FILE *f, int c, int bits, int emin, int sign, int po
|
|||||||
e2 = 0;
|
e2 = 0;
|
||||||
rp = lrp;
|
rp = lrp;
|
||||||
|
|
||||||
while (rp < 18+9*LD_B1B_DIG) {
|
if (rp % 9) {
|
||||||
|
static const int p10s[] = {
|
||||||
|
100000000, 10000000, 1000000, 100000,
|
||||||
|
10000, 1000, 100, 10
|
||||||
|
};
|
||||||
|
int rpm9 = rp>=0 ? rp%9 : rp%9+9;
|
||||||
|
int p10 = p10s[rpm9-1];
|
||||||
|
uint32_t carry = 0;
|
||||||
|
for (k=a; k!=z; k=(k+1 & MASK)) {
|
||||||
|
uint32_t tmp = x[k] % p10;
|
||||||
|
x[k] = x[k]/p10 + carry;
|
||||||
|
carry = 1000000000/p10 * tmp;
|
||||||
|
if (k==a && !x[k]) {
|
||||||
|
a = (a+1 & MASK);
|
||||||
|
rp -= 9;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (carry) {
|
||||||
|
if ((z+1 & MASK) != a) {
|
||||||
|
x[z] = carry;
|
||||||
|
z = (z+1 & MASK);
|
||||||
|
} else x[z-1 & MASK] |= 1;
|
||||||
|
}
|
||||||
|
rp += 9-rpm9;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (rp < 9*LD_B1B_DIG || (rp == 9*LD_B1B_DIG && x[0]<th[0])) {
|
||||||
uint32_t carry = 0;
|
uint32_t carry = 0;
|
||||||
e2 -= 29;
|
e2 -= 29;
|
||||||
for (k=(z-1 & MASK); ; k=(k-1 & MASK)) {
|
for (k=(z-1 & MASK); ; k=(k-1 & MASK)) {
|
||||||
@ -164,32 +190,6 @@ static long double decfloat(FILE *f, int c, int bits, int emin, int sign, int po
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (rp % 9) {
|
|
||||||
static const int p10s[] = {
|
|
||||||
100000000, 10000000, 1000000, 100000,
|
|
||||||
10000, 1000, 100, 10
|
|
||||||
};
|
|
||||||
int rpm9 = rp % 9;
|
|
||||||
int p10 = p10s[rpm9-1];
|
|
||||||
uint32_t carry = 0;
|
|
||||||
for (k=a; k!=z; k=(k+1 & MASK)) {
|
|
||||||
uint32_t tmp = x[k] % p10;
|
|
||||||
x[k] = x[k]/p10 + carry;
|
|
||||||
carry = 1000000000/p10 * tmp;
|
|
||||||
if (k==a && !x[k]) {
|
|
||||||
a = (a+1 & MASK);
|
|
||||||
rp -= 9;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (carry) {
|
|
||||||
if ((z+1 & MASK) != a) {
|
|
||||||
x[z] = carry;
|
|
||||||
z = (z+1 & MASK);
|
|
||||||
} else x[z-1 & MASK] |= 1;
|
|
||||||
}
|
|
||||||
rp += 9-rpm9;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
uint32_t carry = 0;
|
uint32_t carry = 0;
|
||||||
int sh = 1;
|
int sh = 1;
|
||||||
|
Reference in New Issue
Block a user