Optimize some std mem methods by replacing expensive rem operations to fast bit logic (#24)

This commit is contained in:
Max Graey 2018-02-12 19:47:32 +02:00 committed by Daniel Wirtz
parent 74f8189ac1
commit 2175e6fbdc
8 changed files with 193 additions and 172 deletions

1
NOTICE
View File

@ -4,6 +4,7 @@ The following authors have all licensed their contributions to AssemblyScript
under the licensing terms detailed in LICENSE.
* Daniel Wirtz <dcode@dcode.io>
* Max Graey <maxgraey@gmail.com>
================================================================================

View File

@ -5,13 +5,13 @@ function copy_memory(dest: usize, src: usize, n: usize): void {
var w: u32, x: u32;
// copy 1 byte each until src is aligned to 4 bytes
while (n && src % 4) {
while (n && (src & 3)) {
store<u8>(dest++, load<u8>(src++));
n--;
}
// if dst is aligned to 4 bytes as well, copy 4 bytes each
if (dest % 4 == 0) {
if ((dest & 3) == 0) {
while (n >= 16) {
store<u32>(dest , load<u32>(src ));
store<u32>(dest + 4, load<u32>(src + 4));
@ -41,7 +41,7 @@ function copy_memory(dest: usize, src: usize, n: usize): void {
// if dst is not aligned to 4 bytes, use alternating shifts to copy 4 bytes each
// doing shifts if faster when copying enough bytes (here: 32 or more)
if (n >= 32) {
switch (dest % 4) {
switch (dest & 3) {
// known to be != 0
case 1:
w = load<u32>(src);
@ -152,8 +152,8 @@ export function move_memory(dest: usize, src: usize, n: usize): void {
return;
}
if (dest < src) {
if (src % 8 == dest % 8) {
while (dest % 8) {
if ((src & 7) == (dest & 7)) {
while (dest & 7) {
if (!n)
return;
--n;
@ -161,9 +161,9 @@ export function move_memory(dest: usize, src: usize, n: usize): void {
}
while (n >= 8) {
store<u64>(dest, load<u64>(src));
n -= 8;
n -= 8;
dest += 8;
src += 8;
src += 8;
}
}
while (n) {
@ -171,8 +171,8 @@ export function move_memory(dest: usize, src: usize, n: usize): void {
--n;
}
} else {
if (src % 8 == dest % 8) {
while ((dest + n) % 8) {
if ((src & 7) == (dest & 7)) {
while ((dest + n) & 7) {
if (!n)
return;
store<u8>(dest + --n, load<u8>(src + n));
@ -217,7 +217,7 @@ export function set_memory(dest: usize, c: u8, n: usize): void {
n -= k;
n &= -4;
var c32: u32 = -1 / 255 * c;
var c32: u32 = (-1 / 255) * c;
// fill head/tail up to 28 bytes each in preparation
store<u32>(dest, c32);

View File

@ -435,13 +435,13 @@
(local $4 i32)
(loop $continue|0
(if
(if (result i32)
(get_local $2)
(i32.rem_u
(select
(i32.and
(get_local $1)
(i32.const 4)
(i32.const 3)
)
(get_local $2)
(get_local $2)
)
(block
(set_local $0
@ -480,9 +480,9 @@
)
(if
(i32.eqz
(i32.rem_u
(i32.and
(get_local $0)
(i32.const 4)
(i32.const 3)
)
)
(block
@ -684,9 +684,9 @@
(block $tablify|0
(br_table $case0|2 $case1|2 $case2|2 $tablify|0
(i32.sub
(i32.rem_u
(i32.and
(get_local $0)
(i32.const 4)
(i32.const 3)
)
(i32.const 1)
)
@ -2055,21 +2055,21 @@
(block
(if
(i32.eq
(i32.rem_u
(i32.and
(get_local $1)
(i32.const 8)
(i32.const 7)
)
(i32.rem_u
(i32.and
(get_local $0)
(i32.const 8)
(i32.const 7)
)
)
(block
(loop $continue|0
(if
(i32.rem_u
(i32.and
(get_local $0)
(i32.const 8)
(i32.const 7)
)
(block
(if
@ -2191,24 +2191,24 @@
(block
(if
(i32.eq
(i32.rem_u
(i32.and
(get_local $1)
(i32.const 8)
(i32.const 7)
)
(i32.rem_u
(i32.and
(get_local $0)
(i32.const 8)
(i32.const 7)
)
)
(block
(loop $continue|3
(if
(i32.rem_u
(i32.and
(i32.add
(get_local $0)
(get_local $2)
)
(i32.const 8)
(i32.const 7)
)
(block
(if

View File

@ -348,15 +348,16 @@
;;@ (lib)/memory.ts:220:17
(i32.mul
(i32.div_u
;;@ (lib)/memory.ts:220:18
(i32.sub
(i32.const 0)
;;@ (lib)/memory.ts:220:18
;;@ (lib)/memory.ts:220:19
(i32.const 1)
)
;;@ (lib)/memory.ts:220:22
;;@ (lib)/memory.ts:220:23
(i32.const 255)
)
;;@ (lib)/memory.ts:220:28
;;@ (lib)/memory.ts:220:30
(get_local $1)
)
)
@ -700,10 +701,11 @@
(i32.const 0)
)
;;@ (lib)/memory.ts:8:14
(i32.rem_u
(i32.and
;;@ (lib)/memory.ts:8:15
(get_local $1)
;;@ (lib)/memory.ts:8:20
(i32.const 4)
;;@ (lib)/memory.ts:8:21
(i32.const 3)
)
(get_local $2)
)
@ -758,15 +760,16 @@
(if
;;@ (lib)/memory.ts:14:6
(i32.eq
(i32.rem_u
(i32.and
;;@ (lib)/memory.ts:14:7
(get_local $0)
;;@ (lib)/memory.ts:14:13
(i32.const 4)
;;@ (lib)/memory.ts:14:14
(i32.const 3)
)
;;@ (lib)/memory.ts:14:18
;;@ (lib)/memory.ts:14:20
(i32.const 0)
)
;;@ (lib)/memory.ts:14:21
;;@ (lib)/memory.ts:14:23
(block
(block $break|1
(loop $continue|1
@ -1065,10 +1068,10 @@
(block $case0|2
(set_local $5
;;@ (lib)/memory.ts:44:12
(i32.rem_u
(i32.and
(get_local $0)
;;@ (lib)/memory.ts:44:19
(i32.const 4)
(i32.const 3)
)
)
(br_if $case0|2
@ -3003,28 +3006,30 @@
(if
;;@ (lib)/memory.ts:155:8
(i32.eq
(i32.rem_u
(i32.and
;;@ (lib)/memory.ts:155:9
(get_local $1)
;;@ (lib)/memory.ts:155:14
(i32.const 8)
;;@ (lib)/memory.ts:155:15
(i32.const 7)
)
;;@ (lib)/memory.ts:155:19
(i32.rem_u
;;@ (lib)/memory.ts:155:21
(i32.and
;;@ (lib)/memory.ts:155:22
(get_local $0)
;;@ (lib)/memory.ts:155:26
(i32.const 8)
;;@ (lib)/memory.ts:155:29
(i32.const 7)
)
)
;;@ (lib)/memory.ts:155:29
;;@ (lib)/memory.ts:155:33
(block
(block $break|0
(loop $continue|0
(if
;;@ (lib)/memory.ts:156:13
(i32.rem_u
(i32.and
(get_local $0)
;;@ (lib)/memory.ts:156:20
(i32.const 8)
(i32.const 7)
)
(block
(block
@ -3110,7 +3115,7 @@
(set_local $2
(i32.sub
(get_local $2)
;;@ (lib)/memory.ts:164:13
;;@ (lib)/memory.ts:164:16
(i32.const 8)
)
)
@ -3126,7 +3131,7 @@
(set_local $1
(i32.add
(get_local $1)
;;@ (lib)/memory.ts:166:15
;;@ (lib)/memory.ts:166:16
(i32.const 8)
)
)
@ -3199,25 +3204,27 @@
(if
;;@ (lib)/memory.ts:174:8
(i32.eq
(i32.rem_u
(i32.and
;;@ (lib)/memory.ts:174:9
(get_local $1)
;;@ (lib)/memory.ts:174:14
(i32.const 8)
;;@ (lib)/memory.ts:174:15
(i32.const 7)
)
;;@ (lib)/memory.ts:174:19
(i32.rem_u
;;@ (lib)/memory.ts:174:21
(i32.and
;;@ (lib)/memory.ts:174:22
(get_local $0)
;;@ (lib)/memory.ts:174:26
(i32.const 8)
;;@ (lib)/memory.ts:174:29
(i32.const 7)
)
)
;;@ (lib)/memory.ts:174:29
;;@ (lib)/memory.ts:174:33
(block
(block $break|3
(loop $continue|3
(if
;;@ (lib)/memory.ts:175:13
(i32.rem_u
(i32.and
(i32.add
;;@ (lib)/memory.ts:175:14
(get_local $0)
@ -3225,7 +3232,7 @@
(get_local $2)
)
;;@ (lib)/memory.ts:175:26
(i32.const 8)
(i32.const 7)
)
(block
(block

View File

@ -109,13 +109,13 @@
(local $4 i32)
(loop $continue|0
(if
(if (result i32)
(get_local $2)
(i32.rem_u
(select
(i32.and
(get_local $1)
(i32.const 4)
(i32.const 3)
)
(get_local $2)
(get_local $2)
)
(block
(set_local $0
@ -154,9 +154,9 @@
)
(if
(i32.eqz
(i32.rem_u
(i32.and
(get_local $0)
(i32.const 4)
(i32.const 3)
)
)
(block
@ -358,9 +358,9 @@
(block $tablify|0
(br_table $case0|2 $case1|2 $case2|2 $tablify|0
(i32.sub
(i32.rem_u
(i32.and
(get_local $0)
(i32.const 4)
(i32.const 3)
)
(i32.const 1)
)
@ -1729,21 +1729,21 @@
(block
(if
(i32.eq
(i32.rem_u
(i32.and
(get_local $1)
(i32.const 8)
(i32.const 7)
)
(i32.rem_u
(i32.and
(get_local $0)
(i32.const 8)
(i32.const 7)
)
)
(block
(loop $continue|0
(if
(i32.rem_u
(i32.and
(get_local $0)
(i32.const 8)
(i32.const 7)
)
(block
(if
@ -1865,24 +1865,24 @@
(block
(if
(i32.eq
(i32.rem_u
(i32.and
(get_local $1)
(i32.const 8)
(i32.const 7)
)
(i32.rem_u
(i32.and
(get_local $0)
(i32.const 8)
(i32.const 7)
)
)
(block
(loop $continue|3
(if
(i32.rem_u
(i32.and
(i32.add
(get_local $0)
(get_local $2)
)
(i32.const 8)
(i32.const 7)
)
(block
(if

View File

@ -182,10 +182,11 @@
(i32.const 0)
)
;;@ (lib)/memory.ts:8:14
(i32.rem_u
(i32.and
;;@ (lib)/memory.ts:8:15
(get_local $1)
;;@ (lib)/memory.ts:8:20
(i32.const 4)
;;@ (lib)/memory.ts:8:21
(i32.const 3)
)
(get_local $2)
)
@ -240,15 +241,16 @@
(if
;;@ (lib)/memory.ts:14:6
(i32.eq
(i32.rem_u
(i32.and
;;@ (lib)/memory.ts:14:7
(get_local $0)
;;@ (lib)/memory.ts:14:13
(i32.const 4)
;;@ (lib)/memory.ts:14:14
(i32.const 3)
)
;;@ (lib)/memory.ts:14:18
;;@ (lib)/memory.ts:14:20
(i32.const 0)
)
;;@ (lib)/memory.ts:14:21
;;@ (lib)/memory.ts:14:23
(block
(block $break|1
(loop $continue|1
@ -547,10 +549,10 @@
(block $case0|2
(set_local $5
;;@ (lib)/memory.ts:44:12
(i32.rem_u
(i32.and
(get_local $0)
;;@ (lib)/memory.ts:44:19
(i32.const 4)
(i32.const 3)
)
)
(br_if $case0|2
@ -2485,28 +2487,30 @@
(if
;;@ (lib)/memory.ts:155:8
(i32.eq
(i32.rem_u
(i32.and
;;@ (lib)/memory.ts:155:9
(get_local $1)
;;@ (lib)/memory.ts:155:14
(i32.const 8)
;;@ (lib)/memory.ts:155:15
(i32.const 7)
)
;;@ (lib)/memory.ts:155:19
(i32.rem_u
;;@ (lib)/memory.ts:155:21
(i32.and
;;@ (lib)/memory.ts:155:22
(get_local $0)
;;@ (lib)/memory.ts:155:26
(i32.const 8)
;;@ (lib)/memory.ts:155:29
(i32.const 7)
)
)
;;@ (lib)/memory.ts:155:29
;;@ (lib)/memory.ts:155:33
(block
(block $break|0
(loop $continue|0
(if
;;@ (lib)/memory.ts:156:13
(i32.rem_u
(i32.and
(get_local $0)
;;@ (lib)/memory.ts:156:20
(i32.const 8)
(i32.const 7)
)
(block
(block
@ -2592,7 +2596,7 @@
(set_local $2
(i32.sub
(get_local $2)
;;@ (lib)/memory.ts:164:13
;;@ (lib)/memory.ts:164:16
(i32.const 8)
)
)
@ -2608,7 +2612,7 @@
(set_local $1
(i32.add
(get_local $1)
;;@ (lib)/memory.ts:166:15
;;@ (lib)/memory.ts:166:16
(i32.const 8)
)
)
@ -2681,25 +2685,27 @@
(if
;;@ (lib)/memory.ts:174:8
(i32.eq
(i32.rem_u
(i32.and
;;@ (lib)/memory.ts:174:9
(get_local $1)
;;@ (lib)/memory.ts:174:14
(i32.const 8)
;;@ (lib)/memory.ts:174:15
(i32.const 7)
)
;;@ (lib)/memory.ts:174:19
(i32.rem_u
;;@ (lib)/memory.ts:174:21
(i32.and
;;@ (lib)/memory.ts:174:22
(get_local $0)
;;@ (lib)/memory.ts:174:26
(i32.const 8)
;;@ (lib)/memory.ts:174:29
(i32.const 7)
)
)
;;@ (lib)/memory.ts:174:29
;;@ (lib)/memory.ts:174:33
(block
(block $break|3
(loop $continue|3
(if
;;@ (lib)/memory.ts:175:13
(i32.rem_u
(i32.and
(i32.add
;;@ (lib)/memory.ts:175:14
(get_local $0)
@ -2707,7 +2713,7 @@
(get_local $2)
)
;;@ (lib)/memory.ts:175:26
(i32.const 8)
(i32.const 7)
)
(block
(block
@ -3448,15 +3454,16 @@
;;@ (lib)/memory.ts:220:17
(i32.mul
(i32.div_u
;;@ (lib)/memory.ts:220:18
(i32.sub
(i32.const 0)
;;@ (lib)/memory.ts:220:18
;;@ (lib)/memory.ts:220:19
(i32.const 1)
)
;;@ (lib)/memory.ts:220:22
;;@ (lib)/memory.ts:220:23
(i32.const 255)
)
;;@ (lib)/memory.ts:220:28
;;@ (lib)/memory.ts:220:30
(get_local $1)
)
)

View File

@ -106,13 +106,13 @@
(local $4 i32)
(loop $continue|0
(if
(if (result i32)
(get_local $2)
(i32.rem_u
(select
(i32.and
(get_local $1)
(i32.const 4)
(i32.const 3)
)
(get_local $2)
(get_local $2)
)
(block
(set_local $0
@ -151,9 +151,9 @@
)
(if
(i32.eqz
(i32.rem_u
(i32.and
(get_local $0)
(i32.const 4)
(i32.const 3)
)
)
(block
@ -355,9 +355,9 @@
(block $tablify|0
(br_table $case0|2 $case1|2 $case2|2 $tablify|0
(i32.sub
(i32.rem_u
(i32.and
(get_local $0)
(i32.const 4)
(i32.const 3)
)
(i32.const 1)
)
@ -1726,21 +1726,21 @@
(block
(if
(i32.eq
(i32.rem_u
(i32.and
(get_local $1)
(i32.const 8)
(i32.const 7)
)
(i32.rem_u
(i32.and
(get_local $0)
(i32.const 8)
(i32.const 7)
)
)
(block
(loop $continue|0
(if
(i32.rem_u
(i32.and
(get_local $0)
(i32.const 8)
(i32.const 7)
)
(block
(if
@ -1862,24 +1862,24 @@
(block
(if
(i32.eq
(i32.rem_u
(i32.and
(get_local $1)
(i32.const 8)
(i32.const 7)
)
(i32.rem_u
(i32.and
(get_local $0)
(i32.const 8)
(i32.const 7)
)
)
(block
(loop $continue|3
(if
(i32.rem_u
(i32.and
(i32.add
(get_local $0)
(get_local $2)
)
(i32.const 8)
(i32.const 7)
)
(block
(if

View File

@ -179,10 +179,11 @@
(i32.const 0)
)
;;@ (lib)/memory.ts:8:14
(i32.rem_u
(i32.and
;;@ (lib)/memory.ts:8:15
(get_local $1)
;;@ (lib)/memory.ts:8:20
(i32.const 4)
;;@ (lib)/memory.ts:8:21
(i32.const 3)
)
(get_local $2)
)
@ -237,15 +238,16 @@
(if
;;@ (lib)/memory.ts:14:6
(i32.eq
(i32.rem_u
(i32.and
;;@ (lib)/memory.ts:14:7
(get_local $0)
;;@ (lib)/memory.ts:14:13
(i32.const 4)
;;@ (lib)/memory.ts:14:14
(i32.const 3)
)
;;@ (lib)/memory.ts:14:18
;;@ (lib)/memory.ts:14:20
(i32.const 0)
)
;;@ (lib)/memory.ts:14:21
;;@ (lib)/memory.ts:14:23
(block
(block $break|1
(loop $continue|1
@ -544,10 +546,10 @@
(block $case0|2
(set_local $5
;;@ (lib)/memory.ts:44:12
(i32.rem_u
(i32.and
(get_local $0)
;;@ (lib)/memory.ts:44:19
(i32.const 4)
(i32.const 3)
)
)
(br_if $case0|2
@ -2482,28 +2484,30 @@
(if
;;@ (lib)/memory.ts:155:8
(i32.eq
(i32.rem_u
(i32.and
;;@ (lib)/memory.ts:155:9
(get_local $1)
;;@ (lib)/memory.ts:155:14
(i32.const 8)
;;@ (lib)/memory.ts:155:15
(i32.const 7)
)
;;@ (lib)/memory.ts:155:19
(i32.rem_u
;;@ (lib)/memory.ts:155:21
(i32.and
;;@ (lib)/memory.ts:155:22
(get_local $0)
;;@ (lib)/memory.ts:155:26
(i32.const 8)
;;@ (lib)/memory.ts:155:29
(i32.const 7)
)
)
;;@ (lib)/memory.ts:155:29
;;@ (lib)/memory.ts:155:33
(block
(block $break|0
(loop $continue|0
(if
;;@ (lib)/memory.ts:156:13
(i32.rem_u
(i32.and
(get_local $0)
;;@ (lib)/memory.ts:156:20
(i32.const 8)
(i32.const 7)
)
(block
(block
@ -2589,7 +2593,7 @@
(set_local $2
(i32.sub
(get_local $2)
;;@ (lib)/memory.ts:164:13
;;@ (lib)/memory.ts:164:16
(i32.const 8)
)
)
@ -2605,7 +2609,7 @@
(set_local $1
(i32.add
(get_local $1)
;;@ (lib)/memory.ts:166:15
;;@ (lib)/memory.ts:166:16
(i32.const 8)
)
)
@ -2678,25 +2682,27 @@
(if
;;@ (lib)/memory.ts:174:8
(i32.eq
(i32.rem_u
(i32.and
;;@ (lib)/memory.ts:174:9
(get_local $1)
;;@ (lib)/memory.ts:174:14
(i32.const 8)
;;@ (lib)/memory.ts:174:15
(i32.const 7)
)
;;@ (lib)/memory.ts:174:19
(i32.rem_u
;;@ (lib)/memory.ts:174:21
(i32.and
;;@ (lib)/memory.ts:174:22
(get_local $0)
;;@ (lib)/memory.ts:174:26
(i32.const 8)
;;@ (lib)/memory.ts:174:29
(i32.const 7)
)
)
;;@ (lib)/memory.ts:174:29
;;@ (lib)/memory.ts:174:33
(block
(block $break|3
(loop $continue|3
(if
;;@ (lib)/memory.ts:175:13
(i32.rem_u
(i32.and
(i32.add
;;@ (lib)/memory.ts:175:14
(get_local $0)
@ -2704,7 +2710,7 @@
(get_local $2)
)
;;@ (lib)/memory.ts:175:26
(i32.const 8)
(i32.const 7)
)
(block
(block