From 6228233e048381e5b627eff27fde95bb9428f020 Mon Sep 17 00:00:00 2001 From: Daniel Wirtz Date: Fri, 13 Jul 2018 21:45:39 +0200 Subject: [PATCH] Minor itoa improvements (#168) --- std/assembly/internal/itoa.ts | 104 +++++----- tests/compiler/std/string.optimized.wat | 71 ++----- tests/compiler/std/string.untouched.wat | 251 ++++++++++-------------- 3 files changed, 173 insertions(+), 253 deletions(-) diff --git a/std/assembly/internal/itoa.ts b/std/assembly/internal/itoa.ts index 97d114ae..f3be8b7b 100644 --- a/std/assembly/internal/itoa.ts +++ b/std/assembly/internal/itoa.ts @@ -1,15 +1,17 @@ import { CharCode, - allocate, + allocate as allocateString, HEADER_SIZE as STRING_HEADER_SIZE } from "./string"; -import { loadUnsafe } from "./arraybuffer"; +import { + loadUnsafe +} from "./arraybuffer"; @inline -function getPowers10Table(): u32[] { - return [ +function POWERS10(): u32[] { + const table: u32[] = [ 1, 10, 100, @@ -21,6 +23,7 @@ function getPowers10Table(): u32[] { 100000000, 1000000000 ]; + return table; // inlines to a constant memory offset } /* @@ -38,8 +41,8 @@ function getPowers10Table(): u32[] { "90", "91", "92", "93", "94", "95", "96", "97", "98", "99" */ @inline -function getDigitsTable(): u32[] { - return [ +function DIGITS(): u32[] { + const table: u32[] = [ 0x00300030, 0x00310030, 0x00320030, 0x00330030, 0x00340030, 0x00350030, 0x00360030, 0x00370030, 0x00380030, 0x00390030, 0x00300031, 0x00310031, 0x00320031, 0x00330031, 0x00340031, @@ -61,42 +64,40 @@ function getDigitsTable(): u32[] { 0x00300039, 0x00310039, 0x00320039, 0x00330039, 0x00340039, 0x00350039, 0x00360039, 0x00370039, 0x00380039, 0x00390039 ]; + return table; // inlines to a constant memory offset } // Count number of decimals in value -function decimalCount(value: T): i32 { - // make value abs - var sign = value >> (8 * sizeof() - 1); - var v = (value ^ sign) - sign; - var l = 8 * sizeof() - clz(v | 10); // log2 - var t = l * 1233 >>> 12; // log10 +export function decimalCount(value: T): i32 { + var v = abs(value); // NOP if value is unsigned anyway + var l: usize = 8 * sizeof() - clz(v | 10); // log2 + var t = l * 1233 >>> 12; // log10 - var lutbuf = changetype(getPowers10Table().buffer_); + var lutbuf = POWERS10().buffer_; if (sizeof() <= 4) { let power = loadUnsafe(lutbuf, t); - t -= (v < power); + t -= (v < power); } else { // sizeof() == 8 let le10 = t <= 10; - let offset = select(0, 10, le10); // offset = t <= 10 ? 0 : 10 + let offset = select(0, 10, le10); // offset = t <= 10 ? 0 : 10 let factor = select< T >(1, 10000000000, le10); // factor = t <= 10 ? 1 : 10 ^ 10 let power = loadUnsafe(lutbuf, t - offset); - t -= (v < factor * power); + t -= (v < factor * power); } return t + 1; } -function utoa32_lut(buffer: usize, num: u32, offset: u32): void { - var r: u32, t: u32, d1: u32, d2: u32; - var lutbuf = changetype(getDigitsTable().buffer_); +function utoa32_lut(buffer: usize, num: u32, offset: usize): void { + var lutbuf = DIGITS().buffer_; while (num >= 10000) { // in most VMs i32/u32 div and modulo by constant can be shared and simplificate - t = num / 10000; - r = num % 10000; + let t = num / 10000; + let r = num % 10000; num = t; - d1 = r / 100; - d2 = r % 100; + let d1 = r / 100; + let d2 = r % 100; let digits1 = loadUnsafe(lutbuf, d1); let digits2 = loadUnsafe(lutbuf, d2); @@ -106,8 +107,8 @@ function utoa32_lut(buffer: usize, num: u32, offset: u32): void { } if (num >= 100) { - t = num / 100; - d1 = num % 100; + let t = num / 100; + let d1 = num % 100; num = t; offset -= 2; let digits = loadUnsafe(lutbuf, d1); @@ -125,24 +126,21 @@ function utoa32_lut(buffer: usize, num: u32, offset: u32): void { } } -function utoa64_lut(buffer: usize, num: u64, offset: u32): void { - var t: u64, r: u32, b: u32, c: u32; - var b1: u32, b2: u32, c1: u32, c2: u32; - - var lutbuf = changetype(getDigitsTable().buffer_); +function utoa64_lut(buffer: usize, num: u64, offset: usize): void { + var lutbuf = DIGITS().buffer_; while (num >= 100000000) { - t = num / 100000000; - r = (num - t * 100000000); + let t = num / 100000000; + let r = (num - t * 100000000); num = t; - b = r / 10000; - c = r % 10000; + let b = r / 10000; + let c = r % 10000; - b1 = b / 100; - b2 = b % 100; - c1 = c / 100; - c2 = c % 100; + let b1 = b / 100; + let b2 = b % 100; + let c1 = c / 100; + let c2 = c % 100; let digits1 = loadUnsafe(lutbuf, c1); let digits2 = loadUnsafe(lutbuf, c2); @@ -157,15 +155,13 @@ function utoa64_lut(buffer: usize, num: u64, offset: u32): void { store(buffer + (offset << 1), digits1 | (digits2 << 32), STRING_HEADER_SIZE); } - r = num; - if (r) utoa32_lut(buffer, r, offset); + utoa32_lut(buffer, num, offset); } -function utoa_simple(buffer: usize, num: T, offset: u32): void { - var t: T, r: u32; +function utoa_simple(buffer: usize, num: T, offset: usize): void { do { - t = num / 10; - r = (num % 10); + let t = num / 10; + let r = (num % 10); num = t; offset -= 1; store(buffer + (offset << 1), CharCode._0 + r, STRING_HEADER_SIZE); @@ -175,18 +171,18 @@ function utoa_simple(buffer: usize, num: T, offset: u32): void { @inline export function utoa32_core(buffer: usize, num: u32, offset: u32): void { if (ASC_SHRINK_LEVEL >= 1) { - utoa_simple(buffer, num, offset); + utoa_simple(buffer, num, offset); } else { - utoa32_lut(buffer, num, offset); + utoa32_lut(buffer, num, offset); } } @inline export function utoa64_core(buffer: usize, num: u64, offset: u32): void { if (ASC_SHRINK_LEVEL >= 1) { - utoa_simple(buffer, num, offset); + utoa_simple(buffer, num, offset); } else { - utoa64_lut(buffer, num, offset); + utoa64_lut(buffer, num, offset); } } @@ -194,7 +190,7 @@ export function utoa32(value: u32): string { if (!value) return "0"; var decimals = decimalCount(value); - var buffer = allocate(decimals); + var buffer = allocateString(decimals); utoa32_core(changetype(buffer), value, decimals); return changetype(buffer); @@ -207,7 +203,7 @@ export function itoa32(value: i32): string { if (isneg) value = -value; var decimals = decimalCount(value) + isneg; - var buffer = allocate(decimals); + var buffer = allocateString(decimals); utoa32_core(changetype(buffer), value, decimals); if (isneg) store(changetype(buffer), CharCode.MINUS, STRING_HEADER_SIZE); @@ -222,11 +218,11 @@ export function utoa64(value: u64): string { if (value <= u32.MAX_VALUE) { let value32 = value; let decimals = decimalCount(value32); - buffer = allocate(decimals); + buffer = allocateString(decimals); utoa32_core(changetype(buffer), value32, decimals); } else { let decimals = decimalCount(value); - buffer = allocate(decimals); + buffer = allocateString(decimals); utoa64_core(changetype(buffer), value, decimals); } @@ -243,11 +239,11 @@ export function itoa64(value: i64): string { if (value <= u32.MAX_VALUE) { let value32 = value; let decimals = decimalCount(value32) + isneg; - buffer = allocate(decimals); + buffer = allocateString(decimals); utoa32_core(changetype(buffer), value32, decimals); } else { let decimals = decimalCount(value) + isneg; - buffer = allocate(decimals); + buffer = allocateString(decimals); utoa64_core(changetype(buffer), value, decimals); } if (isneg) store(changetype(buffer), CharCode.MINUS, STRING_HEADER_SIZE); diff --git a/tests/compiler/std/string.optimized.wat b/tests/compiler/std/string.optimized.wat index 0db031a2..eadfbcbb 100644 --- a/tests/compiler/std/string.optimized.wat +++ b/tests/compiler/std/string.optimized.wat @@ -3744,27 +3744,14 @@ (i32.const 552) ) (i32.shl - (tee_local $0 + (tee_local $1 (i32.shr_u (i32.mul (i32.sub (i32.const 32) (i32.clz (i32.or - (tee_local $1 - (i32.sub - (i32.xor - (get_local $0) - (tee_local $0 - (i32.shr_u - (get_local $0) - (i32.const 31) - ) - ) - ) - (get_local $0) - ) - ) + (get_local $0) (i32.const 10) ) ) @@ -3781,9 +3768,9 @@ ) (i32.add (i32.sub - (get_local $0) + (get_local $1) (i32.lt_u - (get_local $1) + (get_local $0) (get_local $2) ) ) @@ -4032,7 +4019,7 @@ (local $1 i32) (local $2 i32) (set_local $2 - (i32.le_s + (i32.le_u (tee_local $1 (i32.shr_u (i32.mul @@ -4041,20 +4028,7 @@ (i32.wrap/i64 (i64.clz (i64.or - (tee_local $0 - (i64.sub - (i64.xor - (get_local $0) - (tee_local $0 - (i64.shr_u - (get_local $0) - (i64.const 63) - ) - ) - ) - (get_local $0) - ) - ) + (get_local $0) (i64.const 10) ) ) @@ -4108,7 +4082,7 @@ (local $4 i32) (local $5 i32) (local $6 i32) - (set_local $4 + (set_local $3 (i32.load (i32.const 1472) ) @@ -4124,7 +4098,7 @@ (i32.div_u (tee_local $5 (i32.div_u - (tee_local $3 + (tee_local $4 (i32.wrap/i64 (i64.sub (get_local $1) @@ -4168,12 +4142,12 @@ (i64.or (i64.load32_u offset=8 (i32.add - (get_local $4) + (get_local $3) (i32.shl (i32.div_u - (tee_local $3 + (tee_local $4 (i32.rem_u - (get_local $3) + (get_local $4) (i32.const 10000) ) ) @@ -4186,10 +4160,10 @@ (i64.shl (i64.load32_u offset=8 (i32.add - (get_local $4) + (get_local $3) (i32.shl (i32.rem_u - (get_local $3) + (get_local $4) (i32.const 100) ) (i32.const 2) @@ -4216,7 +4190,7 @@ (i64.or (i64.load32_u offset=8 (i32.add - (get_local $4) + (get_local $3) (i32.shl (get_local $6) (i32.const 2) @@ -4226,7 +4200,7 @@ (i64.shl (i64.load32_u offset=8 (i32.add - (get_local $4) + (get_local $3) (i32.shl (get_local $5) (i32.const 2) @@ -4241,17 +4215,12 @@ ) ) ) - (if - (tee_local $3 - (i32.wrap/i64 - (get_local $1) - ) - ) - (call $~lib/internal/itoa/utoa32_lut - (get_local $0) - (get_local $3) - (get_local $2) + (call $~lib/internal/itoa/utoa32_lut + (get_local $0) + (i32.wrap/i64 + (get_local $1) ) + (get_local $2) ) ) (func $~lib/internal/itoa/utoa64 (; 33 ;) (type $Ii) (param $0 i64) (result i32) diff --git a/tests/compiler/std/string.untouched.wat b/tests/compiler/std/string.untouched.wat index 51065554..de70dfc8 100644 --- a/tests/compiler/std/string.untouched.wat +++ b/tests/compiler/std/string.untouched.wat @@ -4454,29 +4454,10 @@ (local $3 i32) (local $4 i32) (local $5 i32) - (local $6 i32) (set_local $1 - (i32.shr_u - (get_local $0) - (i32.sub - (i32.mul - (i32.const 8) - (i32.const 4) - ) - (i32.const 1) - ) - ) + (get_local $0) ) (set_local $2 - (i32.sub - (i32.xor - (get_local $0) - (get_local $1) - ) - (get_local $1) - ) - ) - (set_local $3 (i32.sub (i32.mul (i32.const 8) @@ -4484,52 +4465,52 @@ ) (i32.clz (i32.or - (get_local $2) + (get_local $1) (i32.const 10) ) ) ) ) - (set_local $4 + (set_local $3 (i32.shr_u (i32.mul - (get_local $3) + (get_local $2) (i32.const 1233) ) (i32.const 12) ) ) - (set_local $5 + (set_local $4 (i32.load - (block $~lib/internal/itoa/getPowers10Table|inlined.0 (result i32) + (block $~lib/internal/itoa/POWERS10|inlined.0 (result i32) (i32.const 552) ) ) ) - (set_local $6 + (set_local $5 (block $~lib/internal/arraybuffer/loadUnsafe|inlined.0 (result i32) (i32.load offset=8 (i32.add - (get_local $5) + (get_local $4) (i32.shl - (get_local $4) + (get_local $3) (i32.const 2) ) ) ) ) ) - (set_local $4 + (set_local $3 (i32.sub - (get_local $4) + (get_local $3) (i32.lt_u - (get_local $2) - (get_local $6) + (get_local $1) + (get_local $5) ) ) ) (i32.add - (get_local $4) + (get_local $3) (i32.const 1) ) ) @@ -4541,10 +4522,9 @@ (local $7 i32) (local $8 i64) (local $9 i64) - (local $10 i32) - (set_local $7 + (set_local $3 (i32.load - (block $~lib/internal/itoa/getDigitsTable|inlined.0 (result i32) + (block $~lib/internal/itoa/DIGITS|inlined.0 (result i32) (i32.const 624) ) ) @@ -4564,7 +4544,7 @@ (i32.const 10000) ) ) - (set_local $3 + (set_local $5 (i32.rem_u (get_local $1) (i32.const 10000) @@ -4573,15 +4553,15 @@ (set_local $1 (get_local $4) ) - (set_local $5 + (set_local $6 (i32.div_u - (get_local $3) + (get_local $5) (i32.const 100) ) ) - (set_local $6 + (set_local $7 (i32.rem_u - (get_local $3) + (get_local $5) (i32.const 100) ) ) @@ -4589,9 +4569,9 @@ (block $~lib/internal/arraybuffer/loadUnsafe|inlined.0 (result i64) (i64.load32_u offset=8 (i32.add - (get_local $7) + (get_local $3) (i32.shl - (get_local $5) + (get_local $6) (i32.const 2) ) ) @@ -4602,9 +4582,9 @@ (block $~lib/internal/arraybuffer/loadUnsafe|inlined.1 (result i64) (i64.load32_u offset=8 (i32.add - (get_local $7) + (get_local $3) (i32.shl - (get_local $6) + (get_local $7) (i32.const 2) ) ) @@ -4645,20 +4625,20 @@ (i32.const 100) ) (block - (set_local $4 + (set_local $7 (i32.div_u (get_local $1) (i32.const 100) ) ) - (set_local $5 + (set_local $6 (i32.rem_u (get_local $1) (i32.const 100) ) ) (set_local $1 - (get_local $4) + (get_local $7) ) (set_local $2 (i32.sub @@ -4666,13 +4646,13 @@ (i32.const 2) ) ) - (set_local $10 + (set_local $5 (block $~lib/internal/arraybuffer/loadUnsafe|inlined.1 (result i32) (i32.load offset=8 (i32.add - (get_local $7) + (get_local $3) (i32.shl - (get_local $5) + (get_local $6) (i32.const 2) ) ) @@ -4687,7 +4667,7 @@ (i32.const 1) ) ) - (get_local $10) + (get_local $5) ) ) ) @@ -4703,11 +4683,11 @@ (i32.const 2) ) ) - (set_local $10 + (set_local $5 (block $~lib/internal/arraybuffer/loadUnsafe|inlined.2 (result i32) (i32.load offset=8 (i32.add - (get_local $7) + (get_local $3) (i32.shl (get_local $1) (i32.const 2) @@ -4724,7 +4704,7 @@ (i32.const 1) ) ) - (get_local $10) + (get_local $5) ) ) (block @@ -4734,7 +4714,7 @@ (i32.const 1) ) ) - (set_local $10 + (set_local $5 (i32.add (i32.const 48) (get_local $1) @@ -4748,7 +4728,7 @@ (i32.const 1) ) ) - (get_local $10) + (get_local $5) ) ) ) @@ -4841,37 +4821,18 @@ ) (func $~lib/internal/itoa/decimalCount (; 31 ;) (type $Ii) (param $0 i64) (result i32) (local $1 i64) - (local $2 i64) + (local $2 i32) (local $3 i32) (local $4 i32) (local $5 i32) (local $6 i32) - (local $7 i32) - (local $8 i64) - (local $9 i32) - (local $10 i64) + (local $7 i64) + (local $8 i32) + (local $9 i64) (set_local $1 - (i64.shr_u - (get_local $0) - (i64.sub - (i64.mul - (i64.const 8) - (i64.const 8) - ) - (i64.const 1) - ) - ) + (get_local $0) ) (set_local $2 - (i64.sub - (i64.xor - (get_local $0) - (get_local $1) - ) - (get_local $1) - ) - ) - (set_local $3 (i32.sub (i32.mul (i32.const 8) @@ -4880,88 +4841,88 @@ (i32.wrap/i64 (i64.clz (i64.or - (get_local $2) + (get_local $1) (i64.const 10) ) ) ) ) ) - (set_local $4 + (set_local $3 (i32.shr_u (i32.mul - (get_local $3) + (get_local $2) (i32.const 1233) ) (i32.const 12) ) ) - (set_local $5 + (set_local $4 (i32.load - (block $~lib/internal/itoa/getPowers10Table|inlined.1 (result i32) + (block $~lib/internal/itoa/POWERS10|inlined.1 (result i32) (i32.const 1400) ) ) ) - (set_local $6 - (i32.le_s - (get_local $4) + (set_local $5 + (i32.le_u + (get_local $3) (i32.const 10) ) ) + (set_local $6 + (select + (i32.const 0) + (i32.const 10) + (get_local $5) + ) + ) (set_local $7 - (select - (i32.const 0) - (i32.const 10) - (get_local $6) - ) - ) - (set_local $8 (select (i64.const 1) (i64.const 10000000000) - (get_local $6) + (get_local $5) ) ) - (set_local $10 + (set_local $9 (block $~lib/internal/arraybuffer/loadUnsafe|inlined.2 (result i64) - (set_local $9 + (set_local $8 (i32.sub - (get_local $4) - (get_local $7) + (get_local $3) + (get_local $6) ) ) (i64.load32_u offset=8 (i32.add - (get_local $5) + (get_local $4) (i32.shl - (get_local $9) + (get_local $8) (i32.const 2) ) ) ) ) ) - (set_local $4 + (set_local $3 (i32.sub - (get_local $4) + (get_local $3) (i64.lt_u - (get_local $2) + (get_local $1) (i64.mul - (get_local $8) - (get_local $10) + (get_local $7) + (get_local $9) ) ) ) ) (i32.add - (get_local $4) + (get_local $3) (i32.const 1) ) ) (func $~lib/internal/itoa/utoa64_lut (; 32 ;) (type $iIiv) (param $0 i32) (param $1 i64) (param $2 i32) - (local $3 i64) - (local $4 i32) + (local $3 i32) + (local $4 i64) (local $5 i32) (local $6 i32) (local $7 i32) @@ -4971,9 +4932,9 @@ (local $11 i32) (local $12 i64) (local $13 i64) - (set_local $11 + (set_local $3 (i32.load - (block $~lib/internal/itoa/getDigitsTable|inlined.1 (result i32) + (block $~lib/internal/itoa/DIGITS|inlined.1 (result i32) (i32.const 1472) ) ) @@ -4987,59 +4948,59 @@ ) (block (block - (set_local $3 + (set_local $4 (i64.div_u (get_local $1) (i64.const 100000000) ) ) - (set_local $4 + (set_local $5 (i32.wrap/i64 (i64.sub (get_local $1) (i64.mul - (get_local $3) + (get_local $4) (i64.const 100000000) ) ) ) ) (set_local $1 - (get_local $3) - ) - (set_local $5 - (i32.div_u - (get_local $4) - (i32.const 10000) - ) + (get_local $4) ) (set_local $6 - (i32.rem_u - (get_local $4) + (i32.div_u + (get_local $5) (i32.const 10000) ) ) (set_local $7 - (i32.div_u + (i32.rem_u (get_local $5) - (i32.const 100) + (i32.const 10000) ) ) (set_local $8 - (i32.rem_u - (get_local $5) + (i32.div_u + (get_local $6) (i32.const 100) ) ) (set_local $9 - (i32.div_u + (i32.rem_u (get_local $6) (i32.const 100) ) ) (set_local $10 + (i32.div_u + (get_local $7) + (i32.const 100) + ) + ) + (set_local $11 (i32.rem_u - (get_local $6) + (get_local $7) (i32.const 100) ) ) @@ -5047,9 +5008,9 @@ (block $~lib/internal/arraybuffer/loadUnsafe|inlined.3 (result i64) (i64.load32_u offset=8 (i32.add - (get_local $11) + (get_local $3) (i32.shl - (get_local $9) + (get_local $10) (i32.const 2) ) ) @@ -5060,9 +5021,9 @@ (block $~lib/internal/arraybuffer/loadUnsafe|inlined.4 (result i64) (i64.load32_u offset=8 (i32.add - (get_local $11) + (get_local $3) (i32.shl - (get_local $10) + (get_local $11) (i32.const 2) ) ) @@ -5095,9 +5056,9 @@ (block $~lib/internal/arraybuffer/loadUnsafe|inlined.5 (result i64) (i64.load32_u offset=8 (i32.add - (get_local $11) + (get_local $3) (i32.shl - (get_local $7) + (get_local $8) (i32.const 2) ) ) @@ -5108,9 +5069,9 @@ (block $~lib/internal/arraybuffer/loadUnsafe|inlined.6 (result i64) (i64.load32_u offset=8 (i32.add - (get_local $11) + (get_local $3) (i32.shl - (get_local $8) + (get_local $9) (i32.const 2) ) ) @@ -5145,18 +5106,12 @@ ) ) ) - (set_local $4 + (call $~lib/internal/itoa/utoa32_lut + (get_local $0) (i32.wrap/i64 (get_local $1) ) - ) - (if - (get_local $4) - (call $~lib/internal/itoa/utoa32_lut - (get_local $0) - (get_local $4) - (get_local $2) - ) + (get_local $2) ) ) (func $~lib/internal/itoa/utoa64 (; 33 ;) (type $Ii) (param $0 i64) (result i32)