diff --git a/NOTICE b/NOTICE index b481ff34..33b55fbc 100644 --- a/NOTICE +++ b/NOTICE @@ -2,6 +2,7 @@ Includes parts of TypeScript: https://github.com/Microsoft/TypeScript + Copyright (c) Microsoft Corporation. All rights reserved. Apache License, Version 2.0 https://www.apache.org/licenses/LICENSE-2.0 @@ -9,5 +10,14 @@ Includes parts of Binaryen: https://github.com/WebAssembly/binaryen + Copyright 2015 WebAssembly Community Group participants Apache License, Version 2.0 https://www.apache.org/licenses/LICENSE-2.0 + +Includes parts of musl: + + http://www.musl-libc.org + + Copyright © 2005-2014 Rich Felker, et al. + MIT License + https://opensource.org/licenses/MIT diff --git a/src/program.ts b/src/program.ts index b9eb4993..e9d618e4 100644 --- a/src/program.ts +++ b/src/program.ts @@ -929,8 +929,8 @@ export class Function extends Element { /** Contextual type arguments. */ contextualTypeArguments: Map = new Map(); - private breakMajor: i32 = 0; - private breakMinor: i32 = 0; + private nextBreakId: i32 = 0; + private breakStack: i32[] | null = null; /** Constructs a new concrete function. */ constructor(prototype: FunctionPrototype, internalName: string, typeArguments: Type[], parameters: Parameter[], returnType: Type, instanceMethodOf: Class | null) { @@ -974,17 +974,26 @@ export class Function extends Element { /** Enters a(nother) break context. */ enterBreakContext(): string { - if (!this.breakMinor) - this.breakMajor++; - return this.breakContext = this.breakMajor.toString(10) + "." + (++this.breakMinor).toString(10); + const id: i32 = this.nextBreakId++; + if (!this.breakStack) + this.breakStack = [ id ]; + else + this.breakStack.push(id); + return this.breakContext = id.toString(10); } /** Leaves the current break context. */ leaveBreakContext(): void { - if (this.breakMinor < 1) - throw new Error("unexpected unbalanced break context"); - if (--this.breakMinor == 0) + assert(this.breakStack != null); + const length: i32 = (this.breakStack).length; + assert(length > 0); + (this.breakStack).pop(); + if (length > 1) { + this.breakContext = (this.breakStack)[length - 2].toString(10) + } else { this.breakContext = null; + this.breakStack = null; + } } } diff --git a/std/impl/heap.ts b/std/impl/heap.ts index d6e2bca4..25be5ca4 100644 --- a/std/impl/heap.ts +++ b/std/impl/heap.ts @@ -34,5 +34,150 @@ class Heap { return (current_memory() << 16) - HEAP_START; } + static copy(dest: usize, src: usize, n: usize): usize { + assert(dest >= HEAP_START); + + // the following is based on musl's implementation of memcpy + let dst: usize = dest; + let w: u32, x: u32; + + // copy 1 byte each until src is aligned to 4 bytes + while (n != 0 && src % 4 != 0) { + store(dst++, load(src++)); + n--; + } + + // if dst is aligned to 4 bytes as well, copy 4 bytes each + if (dst % 4 == 0) { + while (n >= 16) { + store(dst , load(src )); + store(dst + 4, load(src + 4)); + store(dst + 8, load(src + 8)); + store(dst + 12, load(src + 12)); + src += 16; dst += 16; n -= 16; + } + if (n & 8) { + store(dst , load(src )); + store(dst + 4, load(src + 4)); + dst += 8; src += 8; + } + if (n & 4) { + store(dst, load(src)); + dst += 4; src += 4; + } + if (n & 2) { // drop to 2 bytes each + store(dst, load(src)); + dst += 2; src += 2; + } + if (n & 1) { // drop to 1 byte + store(dst++, load(src++)); + } + return dest; + } + + // if dst is not aligned to 4 bytes, use alternating shifts to copy 4 bytes each + // doing shifts if faster when copying enough bytes (here: 32 or more) + if (n >= 32) { + switch (dst % 4) { + // known to be != 0 + case 1: + w = load(src); + store(dst++, load(src++)); + store(dst++, load(src++)); + store(dst++, load(src++)); + n -= 3; + while (n >= 17) { + x = load(src + 1); + store(dst, w >> 24 | x << 8); + w = load(src + 5); + store(dst + 4, x >> 24 | w << 8); + x = load(src + 9); + store(dst + 8, w >> 24 | x << 8); + w = load(src + 13); + store(dst + 12, x >> 24 | w << 8); + src += 16; dst += 16; n -= 16; + } + break; + case 2: + w = load(src); + store(dst++, load(src++)); + store(dst++, load(src++)); + n -= 2; + while (n >= 18) { + x = load(src + 2); + store(dst, w >> 16 | x << 16); + w = load(src + 6); + store(dst + 4, x >> 16 | w << 16); + x = load(src + 10); + store(dst + 8, w >> 16 | x << 16); + w = load(src + 14); + store(dst + 12, x >> 16 | w << 16); + src += 16; dst += 16; n -= 16; + } + break; + case 3: + w = load(src); + store(dst++, load(src++)); + n -= 1; + while (n >= 19) { + x = load(src + 3); + store(dst, w >> 8 | x << 24); + w = load(src + 7); + store(dst + 4, x >> 8 | w << 24); + x = load(src + 11); + store(dst + 8, w >> 8 | x << 24); + w = load(src + 15); + store(dst + 12, x >> 8 | w << 24); + src += 16; dst += 16; n -= 16; + } + break; + } + } + + // copy remaining bytes one by one + if (n & 16) { + store(dst++, load(src++)); + store(dst++, load(src++)); + store(dst++, load(src++)); + store(dst++, load(src++)); + store(dst++, load(src++)); + store(dst++, load(src++)); + store(dst++, load(src++)); + store(dst++, load(src++)); + store(dst++, load(src++)); + store(dst++, load(src++)); + store(dst++, load(src++)); + store(dst++, load(src++)); + store(dst++, load(src++)); + store(dst++, load(src++)); + store(dst++, load(src++)); + store(dst++, load(src++)); + } + if (n & 8) { + store(dst++, load(src++)); + store(dst++, load(src++)); + store(dst++, load(src++)); + store(dst++, load(src++)); + store(dst++, load(src++)); + store(dst++, load(src++)); + store(dst++, load(src++)); + store(dst++, load(src++)); + } + if (n & 4) { + store(dst++, load(src++)); + store(dst++, load(src++)); + store(dst++, load(src++)); + store(dst++, load(src++)); + } + if (n & 2) { + store(dst++, load(src++)); + store(dst++, load(src++)); + } + if (n & 1) { + store(dst++, load(src++)); + } + return dest; + } + private constructor() {} } diff --git a/tests/compiler/do.optimized.wast b/tests/compiler/do.optimized.wast index 0962bcc9..9d4d7f5e 100644 --- a/tests/compiler/do.optimized.wast +++ b/tests/compiler/do.optimized.wast @@ -5,8 +5,8 @@ (export "loopDoInDo" (func $do/loopDoInDo)) (export "memory" (memory $0)) (func $do/loopDo (; 0 ;) (type $iv) (param $0 i32) - (loop $continue|1.1 - (br_if $continue|1.1 + (loop $continue|0 + (br_if $continue|0 (tee_local $0 (i32.sub (get_local $0) @@ -17,15 +17,15 @@ ) ) (func $do/loopDoInDo (; 1 ;) (type $iv) (param $0 i32) - (loop $continue|1.1 + (loop $continue|0 (set_local $0 (i32.sub (get_local $0) (i32.const 1) ) ) - (loop $continue|1.2 - (br_if $continue|1.2 + (loop $continue|1 + (br_if $continue|1 (tee_local $0 (i32.sub (get_local $0) @@ -34,7 +34,7 @@ ) ) ) - (br_if $continue|1.1 + (br_if $continue|0 (get_local $0) ) ) diff --git a/tests/compiler/do.wast b/tests/compiler/do.wast index d88e76fc..2790cf61 100644 --- a/tests/compiler/do.wast +++ b/tests/compiler/do.wast @@ -6,23 +6,23 @@ (export "loopDoInDo" (func $do/loopDoInDo)) (export "memory" (memory $0)) (func $do/loopDo (; 0 ;) (type $iv) (param $0 i32) - (block $break|1.1 - (loop $continue|1.1 + (block $break|0 + (loop $continue|0 (set_local $0 (i32.sub (get_local $0) (i32.const 1) ) ) - (br_if $continue|1.1 + (br_if $continue|0 (get_local $0) ) ) ) ) (func $do/loopDoInDo (; 1 ;) (type $iv) (param $0 i32) - (block $break|1.1 - (loop $continue|1.1 + (block $break|0 + (loop $continue|0 (block (set_local $0 (i32.sub @@ -30,21 +30,21 @@ (i32.const 1) ) ) - (block $break|1.2 - (loop $continue|1.2 + (block $break|1 + (loop $continue|1 (set_local $0 (i32.sub (get_local $0) (i32.const 1) ) ) - (br_if $continue|1.2 + (br_if $continue|1 (get_local $0) ) ) ) ) - (br_if $continue|1.1 + (br_if $continue|0 (get_local $0) ) ) diff --git a/tests/compiler/for.optimized.wast b/tests/compiler/for.optimized.wast index 41329a3b..182f57db 100644 --- a/tests/compiler/for.optimized.wast +++ b/tests/compiler/for.optimized.wast @@ -9,7 +9,7 @@ (set_global $for/i (i32.const 0) ) - (loop $continue|1.1 + (loop $continue|0 (if (i32.lt_s (get_global $for/i) @@ -22,7 +22,7 @@ (i32.const 1) ) ) - (br $continue|1.1) + (br $continue|0) ) ) ) @@ -36,7 +36,7 @@ (set_local $0 (i32.const 0) ) - (loop $continue|2.1 + (loop $continue|1 (if (i32.lt_s (get_local $0) @@ -49,11 +49,11 @@ (i32.const 1) ) ) - (br $continue|2.1) + (br $continue|1) ) ) ) - (loop $continue|3.1 + (loop $continue|2 (if (i32.gt_s (get_global $for/i) @@ -66,7 +66,7 @@ (i32.const 1) ) ) - (br $continue|3.1) + (br $continue|2) ) ) ) @@ -74,9 +74,9 @@ (get_global $for/i) (unreachable) ) - (block $break|4.1 - (loop $continue|4.1 - (br_if $break|4.1 + (block $break|3 + (loop $continue|3 + (br_if $break|3 (i32.eq (get_global $for/i) (i32.const 10) @@ -88,17 +88,17 @@ (i32.const 1) ) ) - (br $continue|4.1) + (br $continue|3) ) ) - (loop $continue|5.1 + (loop $continue|4 (set_global $for/i (i32.sub (get_global $for/i) (i32.const 1) ) ) - (br_if $continue|5.1 + (br_if $continue|4 (get_global $for/i) ) ) diff --git a/tests/compiler/for.wast b/tests/compiler/for.wast index 24385088..199f0f88 100644 --- a/tests/compiler/for.wast +++ b/tests/compiler/for.wast @@ -7,11 +7,11 @@ (start $start) (func $start (; 0 ;) (type $v) (local $0 i32) - (block $break|1.1 + (block $break|0 (set_global $for/i (i32.const 0) ) - (loop $continue|1.1 + (loop $continue|0 (if (i32.lt_s (get_global $for/i) @@ -25,7 +25,7 @@ (i32.const 1) ) ) - (br $continue|1.1) + (br $continue|0) ) ) ) @@ -39,13 +39,13 @@ ) (unreachable) ) - (block $break|2.1 + (block $break|1 (block (set_local $0 (i32.const 0) ) ) - (loop $continue|2.1 + (loop $continue|1 (if (i32.lt_s (get_local $0) @@ -59,14 +59,14 @@ (i32.const 1) ) ) - (br $continue|2.1) + (br $continue|1) ) ) ) ) - (block $break|3.1 + (block $break|2 (nop) - (loop $continue|3.1 + (loop $continue|2 (if (i32.gt_s (get_global $for/i) @@ -80,7 +80,7 @@ (i32.const 1) ) ) - (br $continue|3.1) + (br $continue|2) ) ) ) @@ -94,9 +94,9 @@ ) (unreachable) ) - (block $break|4.1 + (block $break|3 (nop) - (loop $continue|4.1 + (loop $continue|3 (if (i32.const 1) (block @@ -105,7 +105,7 @@ (get_global $for/i) (i32.const 10) ) - (br $break|4.1) + (br $break|3) ) (set_global $for/i (i32.add @@ -113,14 +113,14 @@ (i32.const 1) ) ) - (br $continue|4.1) + (br $continue|3) ) ) ) ) - (block $break|5.1 + (block $break|4 (nop) - (loop $continue|5.1 + (loop $continue|4 (if (i32.const 1) (block @@ -137,10 +137,10 @@ ) (i32.const 0) ) - (br $break|5.1) + (br $break|4) ) (nop) - (br $continue|5.1) + (br $continue|4) ) ) ) diff --git a/tests/compiler/game-of-life.optimized.wast b/tests/compiler/game-of-life.optimized.wast index 928b734e..3078cae6 100644 --- a/tests/compiler/game-of-life.optimized.wast +++ b/tests/compiler/game-of-life.optimized.wast @@ -46,7 +46,7 @@ (set_local $0 (i32.const 0) ) - (loop $continue|1.1 + (loop $continue|0 (if (i32.lt_s (get_local $0) @@ -79,7 +79,7 @@ (set_local $1 (i32.const 0) ) - (loop $continue|1.2 + (loop $continue|1 (if (i32.lt_s (get_local $1) @@ -259,7 +259,7 @@ (i32.const 1) ) ) - (br $continue|1.2) + (br $continue|1) ) ) ) @@ -269,7 +269,7 @@ (i32.const 1) ) ) - (br $continue|1.1) + (br $continue|0) ) ) ) diff --git a/tests/compiler/game-of-life.wast b/tests/compiler/game-of-life.wast index 5dfcebc0..c378fbdc 100644 --- a/tests/compiler/game-of-life.wast +++ b/tests/compiler/game-of-life.wast @@ -57,11 +57,11 @@ (i32.const 0) ) ) - (block $break|1.1 + (block $break|0 (set_local $0 (i32.const 0) ) - (loop $continue|1.1 + (loop $continue|0 (if (i32.lt_s (get_local $0) @@ -95,11 +95,11 @@ ) ) ) - (block $break|1.2 + (block $break|1 (set_local $3 (i32.const 0) ) - (loop $continue|1.2 + (loop $continue|1 (if (i32.lt_s (get_local $3) @@ -289,7 +289,7 @@ (i32.const 1) ) ) - (br $continue|1.2) + (br $continue|1) ) ) ) @@ -301,7 +301,7 @@ (i32.const 1) ) ) - (br $continue|1.1) + (br $continue|0) ) ) ) diff --git a/tests/compiler/memcpy.optimized.wast b/tests/compiler/memcpy.optimized.wast new file mode 100644 index 00000000..f6a78364 --- /dev/null +++ b/tests/compiler/memcpy.optimized.wast @@ -0,0 +1,1727 @@ +(module + (type $iiii (func (param i32 i32 i32) (result i32))) + (type $v (func)) + (global $memcpy/dst (mut i32) (i32.const 0)) + (memory $0 1) + (export "memcpy" (func $memcpy/memcpy)) + (export "memory" (memory $0)) + (start $start) + (func $memcpy/memcpy (; 0 ;) (type $iiii) (param $0 i32) (param $1 i32) (param $2 i32) (result i32) + (local $3 i32) + (local $4 i32) + (local $5 i32) + (set_local $4 + (get_local $0) + ) + (loop $continue|0 + (if + (if (result i32) + (tee_local $3 + (i32.ne + (get_local $2) + (i32.const 0) + ) + ) + (i32.rem_s + (get_local $1) + (i32.const 4) + ) + (get_local $3) + ) + (block + (set_local $4 + (i32.add + (tee_local $3 + (get_local $4) + ) + (i32.const 1) + ) + ) + (i32.store8 + (get_local $3) + (block (result i32) + (set_local $1 + (i32.add + (tee_local $3 + (get_local $1) + ) + (i32.const 1) + ) + ) + (i32.load8_u + (get_local $3) + ) + ) + ) + (set_local $2 + (i32.sub + (get_local $2) + (i32.const 1) + ) + ) + (br $continue|0) + ) + ) + ) + (if + (i32.eqz + (i32.rem_s + (get_local $4) + (i32.const 4) + ) + ) + (block + (loop $continue|1 + (if + (i32.ge_s + (get_local $2) + (i32.const 16) + ) + (block + (i32.store + (get_local $4) + (i32.load + (get_local $1) + ) + ) + (i32.store + (i32.add + (get_local $4) + (i32.const 4) + ) + (i32.load + (i32.add + (get_local $1) + (i32.const 4) + ) + ) + ) + (i32.store + (i32.add + (get_local $4) + (i32.const 8) + ) + (i32.load + (i32.add + (get_local $1) + (i32.const 8) + ) + ) + ) + (i32.store + (i32.add + (get_local $4) + (i32.const 12) + ) + (i32.load + (i32.add + (get_local $1) + (i32.const 12) + ) + ) + ) + (set_local $1 + (i32.add + (get_local $1) + (i32.const 16) + ) + ) + (set_local $4 + (i32.add + (get_local $4) + (i32.const 16) + ) + ) + (set_local $2 + (i32.sub + (get_local $2) + (i32.const 16) + ) + ) + (br $continue|1) + ) + ) + ) + (if + (i32.and + (get_local $2) + (i32.const 8) + ) + (block + (i32.store + (get_local $4) + (i32.load + (get_local $1) + ) + ) + (i32.store + (i32.add + (get_local $4) + (i32.const 4) + ) + (i32.load + (i32.add + (get_local $1) + (i32.const 4) + ) + ) + ) + (set_local $4 + (i32.add + (get_local $4) + (i32.const 8) + ) + ) + (set_local $1 + (i32.add + (get_local $1) + (i32.const 8) + ) + ) + ) + ) + (if + (i32.and + (get_local $2) + (i32.const 4) + ) + (block + (i32.store + (get_local $4) + (i32.load + (get_local $1) + ) + ) + (set_local $4 + (i32.add + (get_local $4) + (i32.const 4) + ) + ) + (set_local $1 + (i32.add + (get_local $1) + (i32.const 4) + ) + ) + ) + ) + (if + (i32.and + (get_local $2) + (i32.const 2) + ) + (block + (i32.store16 + (get_local $4) + (i32.load16_u + (get_local $1) + ) + ) + (set_local $4 + (i32.add + (get_local $4) + (i32.const 2) + ) + ) + (set_local $1 + (i32.add + (get_local $1) + (i32.const 2) + ) + ) + ) + ) + (if + (i32.and + (get_local $2) + (i32.const 1) + ) + (i32.store8 + (get_local $4) + (i32.load8_u + (get_local $1) + ) + ) + ) + (return + (get_local $0) + ) + ) + ) + (if + (i32.ge_s + (get_local $2) + (i32.const 32) + ) + (block $break|2 + (block $case2|2 + (block $case1|2 + (block $case0|2 + (block $tablify|0 + (br_table $case0|2 $case1|2 $case2|2 $tablify|0 + (i32.sub + (i32.rem_s + (get_local $4) + (i32.const 4) + ) + (i32.const 1) + ) + ) + ) + (br $break|2) + ) + (set_local $5 + (i32.load + (get_local $1) + ) + ) + (set_local $4 + (i32.add + (tee_local $3 + (get_local $4) + ) + (i32.const 1) + ) + ) + (i32.store8 + (get_local $3) + (block (result i32) + (set_local $1 + (i32.add + (tee_local $3 + (get_local $1) + ) + (i32.const 1) + ) + ) + (i32.load8_u + (get_local $3) + ) + ) + ) + (set_local $4 + (i32.add + (tee_local $3 + (get_local $4) + ) + (i32.const 1) + ) + ) + (i32.store8 + (get_local $3) + (block (result i32) + (set_local $1 + (i32.add + (tee_local $3 + (get_local $1) + ) + (i32.const 1) + ) + ) + (i32.load8_u + (get_local $3) + ) + ) + ) + (set_local $4 + (i32.add + (tee_local $3 + (get_local $4) + ) + (i32.const 1) + ) + ) + (i32.store8 + (get_local $3) + (block (result i32) + (set_local $1 + (i32.add + (tee_local $3 + (get_local $1) + ) + (i32.const 1) + ) + ) + (i32.load8_u + (get_local $3) + ) + ) + ) + (set_local $2 + (i32.sub + (get_local $2) + (i32.const 3) + ) + ) + (loop $continue|3 + (if + (i32.ge_s + (get_local $2) + (i32.const 17) + ) + (block + (i32.store + (get_local $4) + (i32.or + (i32.shr_u + (get_local $5) + (i32.const 24) + ) + (i32.shl + (tee_local $3 + (i32.load + (i32.add + (get_local $1) + (i32.const 1) + ) + ) + ) + (i32.const 8) + ) + ) + ) + (i32.store + (i32.add + (get_local $4) + (i32.const 4) + ) + (i32.or + (i32.shr_u + (get_local $3) + (i32.const 24) + ) + (i32.shl + (tee_local $5 + (i32.load + (i32.add + (get_local $1) + (i32.const 5) + ) + ) + ) + (i32.const 8) + ) + ) + ) + (i32.store + (i32.add + (get_local $4) + (i32.const 8) + ) + (i32.or + (i32.shr_u + (get_local $5) + (i32.const 24) + ) + (i32.shl + (tee_local $3 + (i32.load + (i32.add + (get_local $1) + (i32.const 9) + ) + ) + ) + (i32.const 8) + ) + ) + ) + (i32.store + (i32.add + (get_local $4) + (i32.const 12) + ) + (i32.or + (i32.shr_u + (get_local $3) + (i32.const 24) + ) + (i32.shl + (tee_local $5 + (i32.load + (i32.add + (get_local $1) + (i32.const 13) + ) + ) + ) + (i32.const 8) + ) + ) + ) + (set_local $1 + (i32.add + (get_local $1) + (i32.const 16) + ) + ) + (set_local $4 + (i32.add + (get_local $4) + (i32.const 16) + ) + ) + (set_local $2 + (i32.sub + (get_local $2) + (i32.const 16) + ) + ) + (br $continue|3) + ) + ) + ) + (br $break|2) + ) + (set_local $5 + (i32.load + (get_local $1) + ) + ) + (set_local $4 + (i32.add + (tee_local $3 + (get_local $4) + ) + (i32.const 1) + ) + ) + (i32.store8 + (get_local $3) + (block (result i32) + (set_local $1 + (i32.add + (tee_local $3 + (get_local $1) + ) + (i32.const 1) + ) + ) + (i32.load8_u + (get_local $3) + ) + ) + ) + (set_local $4 + (i32.add + (tee_local $3 + (get_local $4) + ) + (i32.const 1) + ) + ) + (i32.store8 + (get_local $3) + (block (result i32) + (set_local $1 + (i32.add + (tee_local $3 + (get_local $1) + ) + (i32.const 1) + ) + ) + (i32.load8_u + (get_local $3) + ) + ) + ) + (set_local $2 + (i32.sub + (get_local $2) + (i32.const 2) + ) + ) + (loop $continue|4 + (if + (i32.ge_s + (get_local $2) + (i32.const 18) + ) + (block + (i32.store + (get_local $4) + (i32.or + (i32.shr_u + (get_local $5) + (i32.const 16) + ) + (i32.shl + (tee_local $3 + (i32.load + (i32.add + (get_local $1) + (i32.const 2) + ) + ) + ) + (i32.const 16) + ) + ) + ) + (i32.store + (i32.add + (get_local $4) + (i32.const 4) + ) + (i32.or + (i32.shr_u + (get_local $3) + (i32.const 16) + ) + (i32.shl + (tee_local $5 + (i32.load + (i32.add + (get_local $1) + (i32.const 6) + ) + ) + ) + (i32.const 16) + ) + ) + ) + (i32.store + (i32.add + (get_local $4) + (i32.const 8) + ) + (i32.or + (i32.shr_u + (get_local $5) + (i32.const 16) + ) + (i32.shl + (tee_local $3 + (i32.load + (i32.add + (get_local $1) + (i32.const 10) + ) + ) + ) + (i32.const 16) + ) + ) + ) + (i32.store + (i32.add + (get_local $4) + (i32.const 12) + ) + (i32.or + (i32.shr_u + (get_local $3) + (i32.const 16) + ) + (i32.shl + (tee_local $5 + (i32.load + (i32.add + (get_local $1) + (i32.const 14) + ) + ) + ) + (i32.const 16) + ) + ) + ) + (set_local $1 + (i32.add + (get_local $1) + (i32.const 16) + ) + ) + (set_local $4 + (i32.add + (get_local $4) + (i32.const 16) + ) + ) + (set_local $2 + (i32.sub + (get_local $2) + (i32.const 16) + ) + ) + (br $continue|4) + ) + ) + ) + (br $break|2) + ) + (set_local $5 + (i32.load + (get_local $1) + ) + ) + (set_local $4 + (i32.add + (tee_local $3 + (get_local $4) + ) + (i32.const 1) + ) + ) + (i32.store8 + (get_local $3) + (block (result i32) + (set_local $1 + (i32.add + (tee_local $3 + (get_local $1) + ) + (i32.const 1) + ) + ) + (i32.load8_u + (get_local $3) + ) + ) + ) + (set_local $2 + (i32.sub + (get_local $2) + (i32.const 1) + ) + ) + (loop $continue|5 + (if + (i32.ge_s + (get_local $2) + (i32.const 19) + ) + (block + (i32.store + (get_local $4) + (i32.or + (i32.shr_u + (get_local $5) + (i32.const 8) + ) + (i32.shl + (tee_local $3 + (i32.load + (i32.add + (get_local $1) + (i32.const 3) + ) + ) + ) + (i32.const 24) + ) + ) + ) + (i32.store + (i32.add + (get_local $4) + (i32.const 4) + ) + (i32.or + (i32.shr_u + (get_local $3) + (i32.const 8) + ) + (i32.shl + (tee_local $5 + (i32.load + (i32.add + (get_local $1) + (i32.const 7) + ) + ) + ) + (i32.const 24) + ) + ) + ) + (i32.store + (i32.add + (get_local $4) + (i32.const 8) + ) + (i32.or + (i32.shr_u + (get_local $5) + (i32.const 8) + ) + (i32.shl + (tee_local $3 + (i32.load + (i32.add + (get_local $1) + (i32.const 11) + ) + ) + ) + (i32.const 24) + ) + ) + ) + (i32.store + (i32.add + (get_local $4) + (i32.const 12) + ) + (i32.or + (i32.shr_u + (get_local $3) + (i32.const 8) + ) + (i32.shl + (tee_local $5 + (i32.load + (i32.add + (get_local $1) + (i32.const 15) + ) + ) + ) + (i32.const 24) + ) + ) + ) + (set_local $1 + (i32.add + (get_local $1) + (i32.const 16) + ) + ) + (set_local $4 + (i32.add + (get_local $4) + (i32.const 16) + ) + ) + (set_local $2 + (i32.sub + (get_local $2) + (i32.const 16) + ) + ) + (br $continue|5) + ) + ) + ) + ) + ) + (if + (i32.and + (get_local $2) + (i32.const 16) + ) + (block + (set_local $4 + (i32.add + (tee_local $3 + (get_local $4) + ) + (i32.const 1) + ) + ) + (i32.store8 + (get_local $3) + (block (result i32) + (set_local $1 + (i32.add + (tee_local $3 + (get_local $1) + ) + (i32.const 1) + ) + ) + (i32.load8_u + (get_local $3) + ) + ) + ) + (set_local $4 + (i32.add + (tee_local $3 + (get_local $4) + ) + (i32.const 1) + ) + ) + (i32.store8 + (get_local $3) + (block (result i32) + (set_local $1 + (i32.add + (tee_local $3 + (get_local $1) + ) + (i32.const 1) + ) + ) + (i32.load8_u + (get_local $3) + ) + ) + ) + (set_local $4 + (i32.add + (tee_local $3 + (get_local $4) + ) + (i32.const 1) + ) + ) + (i32.store8 + (get_local $3) + (block (result i32) + (set_local $1 + (i32.add + (tee_local $3 + (get_local $1) + ) + (i32.const 1) + ) + ) + (i32.load8_u + (get_local $3) + ) + ) + ) + (set_local $4 + (i32.add + (tee_local $3 + (get_local $4) + ) + (i32.const 1) + ) + ) + (i32.store8 + (get_local $3) + (block (result i32) + (set_local $1 + (i32.add + (tee_local $3 + (get_local $1) + ) + (i32.const 1) + ) + ) + (i32.load8_u + (get_local $3) + ) + ) + ) + (set_local $4 + (i32.add + (tee_local $3 + (get_local $4) + ) + (i32.const 1) + ) + ) + (i32.store8 + (get_local $3) + (block (result i32) + (set_local $1 + (i32.add + (tee_local $3 + (get_local $1) + ) + (i32.const 1) + ) + ) + (i32.load8_u + (get_local $3) + ) + ) + ) + (set_local $4 + (i32.add + (tee_local $3 + (get_local $4) + ) + (i32.const 1) + ) + ) + (i32.store8 + (get_local $3) + (block (result i32) + (set_local $1 + (i32.add + (tee_local $3 + (get_local $1) + ) + (i32.const 1) + ) + ) + (i32.load8_u + (get_local $3) + ) + ) + ) + (set_local $4 + (i32.add + (tee_local $3 + (get_local $4) + ) + (i32.const 1) + ) + ) + (i32.store8 + (get_local $3) + (block (result i32) + (set_local $1 + (i32.add + (tee_local $3 + (get_local $1) + ) + (i32.const 1) + ) + ) + (i32.load8_u + (get_local $3) + ) + ) + ) + (set_local $4 + (i32.add + (tee_local $3 + (get_local $4) + ) + (i32.const 1) + ) + ) + (i32.store8 + (get_local $3) + (block (result i32) + (set_local $1 + (i32.add + (tee_local $3 + (get_local $1) + ) + (i32.const 1) + ) + ) + (i32.load8_u + (get_local $3) + ) + ) + ) + (set_local $4 + (i32.add + (tee_local $3 + (get_local $4) + ) + (i32.const 1) + ) + ) + (i32.store8 + (get_local $3) + (block (result i32) + (set_local $1 + (i32.add + (tee_local $3 + (get_local $1) + ) + (i32.const 1) + ) + ) + (i32.load8_u + (get_local $3) + ) + ) + ) + (set_local $4 + (i32.add + (tee_local $3 + (get_local $4) + ) + (i32.const 1) + ) + ) + (i32.store8 + (get_local $3) + (block (result i32) + (set_local $1 + (i32.add + (tee_local $3 + (get_local $1) + ) + (i32.const 1) + ) + ) + (i32.load8_u + (get_local $3) + ) + ) + ) + (set_local $4 + (i32.add + (tee_local $3 + (get_local $4) + ) + (i32.const 1) + ) + ) + (i32.store8 + (get_local $3) + (block (result i32) + (set_local $1 + (i32.add + (tee_local $3 + (get_local $1) + ) + (i32.const 1) + ) + ) + (i32.load8_u + (get_local $3) + ) + ) + ) + (set_local $4 + (i32.add + (tee_local $3 + (get_local $4) + ) + (i32.const 1) + ) + ) + (i32.store8 + (get_local $3) + (block (result i32) + (set_local $1 + (i32.add + (tee_local $3 + (get_local $1) + ) + (i32.const 1) + ) + ) + (i32.load8_u + (get_local $3) + ) + ) + ) + (set_local $4 + (i32.add + (tee_local $3 + (get_local $4) + ) + (i32.const 1) + ) + ) + (i32.store8 + (get_local $3) + (block (result i32) + (set_local $1 + (i32.add + (tee_local $3 + (get_local $1) + ) + (i32.const 1) + ) + ) + (i32.load8_u + (get_local $3) + ) + ) + ) + (set_local $4 + (i32.add + (tee_local $3 + (get_local $4) + ) + (i32.const 1) + ) + ) + (i32.store8 + (get_local $3) + (block (result i32) + (set_local $1 + (i32.add + (tee_local $3 + (get_local $1) + ) + (i32.const 1) + ) + ) + (i32.load8_u + (get_local $3) + ) + ) + ) + (set_local $4 + (i32.add + (tee_local $3 + (get_local $4) + ) + (i32.const 1) + ) + ) + (i32.store8 + (get_local $3) + (block (result i32) + (set_local $1 + (i32.add + (tee_local $3 + (get_local $1) + ) + (i32.const 1) + ) + ) + (i32.load8_u + (get_local $3) + ) + ) + ) + (set_local $4 + (i32.add + (tee_local $3 + (get_local $4) + ) + (i32.const 1) + ) + ) + (i32.store8 + (get_local $3) + (block (result i32) + (set_local $1 + (i32.add + (tee_local $3 + (get_local $1) + ) + (i32.const 1) + ) + ) + (i32.load8_u + (get_local $3) + ) + ) + ) + ) + ) + (if + (i32.and + (get_local $2) + (i32.const 8) + ) + (block + (set_local $4 + (i32.add + (tee_local $3 + (get_local $4) + ) + (i32.const 1) + ) + ) + (i32.store8 + (get_local $3) + (block (result i32) + (set_local $1 + (i32.add + (tee_local $3 + (get_local $1) + ) + (i32.const 1) + ) + ) + (i32.load8_u + (get_local $3) + ) + ) + ) + (set_local $4 + (i32.add + (tee_local $3 + (get_local $4) + ) + (i32.const 1) + ) + ) + (i32.store8 + (get_local $3) + (block (result i32) + (set_local $1 + (i32.add + (tee_local $3 + (get_local $1) + ) + (i32.const 1) + ) + ) + (i32.load8_u + (get_local $3) + ) + ) + ) + (set_local $4 + (i32.add + (tee_local $3 + (get_local $4) + ) + (i32.const 1) + ) + ) + (i32.store8 + (get_local $3) + (block (result i32) + (set_local $1 + (i32.add + (tee_local $3 + (get_local $1) + ) + (i32.const 1) + ) + ) + (i32.load8_u + (get_local $3) + ) + ) + ) + (set_local $4 + (i32.add + (tee_local $3 + (get_local $4) + ) + (i32.const 1) + ) + ) + (i32.store8 + (get_local $3) + (block (result i32) + (set_local $1 + (i32.add + (tee_local $3 + (get_local $1) + ) + (i32.const 1) + ) + ) + (i32.load8_u + (get_local $3) + ) + ) + ) + (set_local $4 + (i32.add + (tee_local $3 + (get_local $4) + ) + (i32.const 1) + ) + ) + (i32.store8 + (get_local $3) + (block (result i32) + (set_local $1 + (i32.add + (tee_local $3 + (get_local $1) + ) + (i32.const 1) + ) + ) + (i32.load8_u + (get_local $3) + ) + ) + ) + (set_local $4 + (i32.add + (tee_local $3 + (get_local $4) + ) + (i32.const 1) + ) + ) + (i32.store8 + (get_local $3) + (block (result i32) + (set_local $1 + (i32.add + (tee_local $3 + (get_local $1) + ) + (i32.const 1) + ) + ) + (i32.load8_u + (get_local $3) + ) + ) + ) + (set_local $4 + (i32.add + (tee_local $3 + (get_local $4) + ) + (i32.const 1) + ) + ) + (i32.store8 + (get_local $3) + (block (result i32) + (set_local $1 + (i32.add + (tee_local $3 + (get_local $1) + ) + (i32.const 1) + ) + ) + (i32.load8_u + (get_local $3) + ) + ) + ) + (set_local $4 + (i32.add + (tee_local $3 + (get_local $4) + ) + (i32.const 1) + ) + ) + (i32.store8 + (get_local $3) + (block (result i32) + (set_local $1 + (i32.add + (tee_local $3 + (get_local $1) + ) + (i32.const 1) + ) + ) + (i32.load8_u + (get_local $3) + ) + ) + ) + ) + ) + (if + (i32.and + (get_local $2) + (i32.const 4) + ) + (block + (set_local $4 + (i32.add + (tee_local $3 + (get_local $4) + ) + (i32.const 1) + ) + ) + (i32.store8 + (get_local $3) + (block (result i32) + (set_local $1 + (i32.add + (tee_local $3 + (get_local $1) + ) + (i32.const 1) + ) + ) + (i32.load8_u + (get_local $3) + ) + ) + ) + (set_local $4 + (i32.add + (tee_local $3 + (get_local $4) + ) + (i32.const 1) + ) + ) + (i32.store8 + (get_local $3) + (block (result i32) + (set_local $1 + (i32.add + (tee_local $3 + (get_local $1) + ) + (i32.const 1) + ) + ) + (i32.load8_u + (get_local $3) + ) + ) + ) + (set_local $4 + (i32.add + (tee_local $3 + (get_local $4) + ) + (i32.const 1) + ) + ) + (i32.store8 + (get_local $3) + (block (result i32) + (set_local $1 + (i32.add + (tee_local $3 + (get_local $1) + ) + (i32.const 1) + ) + ) + (i32.load8_u + (get_local $3) + ) + ) + ) + (set_local $4 + (i32.add + (tee_local $3 + (get_local $4) + ) + (i32.const 1) + ) + ) + (i32.store8 + (get_local $3) + (block (result i32) + (set_local $1 + (i32.add + (tee_local $3 + (get_local $1) + ) + (i32.const 1) + ) + ) + (i32.load8_u + (get_local $3) + ) + ) + ) + ) + ) + (if + (i32.and + (get_local $2) + (i32.const 2) + ) + (block + (set_local $4 + (i32.add + (tee_local $3 + (get_local $4) + ) + (i32.const 1) + ) + ) + (i32.store8 + (get_local $3) + (block (result i32) + (set_local $1 + (i32.add + (tee_local $3 + (get_local $1) + ) + (i32.const 1) + ) + ) + (i32.load8_u + (get_local $3) + ) + ) + ) + (set_local $4 + (i32.add + (tee_local $3 + (get_local $4) + ) + (i32.const 1) + ) + ) + (i32.store8 + (get_local $3) + (block (result i32) + (set_local $1 + (i32.add + (tee_local $3 + (get_local $1) + ) + (i32.const 1) + ) + ) + (i32.load8_u + (get_local $3) + ) + ) + ) + ) + ) + (if + (i32.and + (get_local $2) + (i32.const 1) + ) + (i32.store8 + (get_local $4) + (i32.load8_u + (get_local $1) + ) + ) + ) + (get_local $0) + ) + (func $start (; 1 ;) (type $v) + (i64.store + (i32.const 8) + (i64.const 1229782938247303441) + ) + (i64.store + (i32.const 16) + (i64.const 2459565876494606882) + ) + (i64.store + (i32.const 24) + (i64.const 3689348814741910323) + ) + (i64.store + (i32.const 32) + (i64.const 4919131752989213764) + ) + (set_global $memcpy/dst + (call $memcpy/memcpy + (i32.const 9) + (i32.const 24) + (i32.const 4) + ) + ) + (if + (i32.ne + (get_global $memcpy/dst) + (i32.const 9) + ) + (unreachable) + ) + (if + (i64.ne + (i64.load + (i32.const 8) + ) + (i64.const 1229783084848853777) + ) + (unreachable) + ) + (set_global $memcpy/dst + (call $memcpy/memcpy + (i32.const 8) + (i32.const 8) + (i32.const 32) + ) + ) + (if + (i32.ne + (get_global $memcpy/dst) + (i32.const 8) + ) + (unreachable) + ) + (if + (i64.ne + (i64.load + (i32.const 8) + ) + (i64.const 1229783084848853777) + ) + (unreachable) + ) + (if + (i64.ne + (i64.load + (i32.const 16) + ) + (i64.const 2459565876494606882) + ) + (unreachable) + ) + (if + (i64.ne + (i64.load + (i32.const 24) + ) + (i64.const 3689348814741910323) + ) + (unreachable) + ) + (if + (i64.ne + (i64.load + (i32.const 32) + ) + (i64.const 4919131752989213764) + ) + (unreachable) + ) + (set_global $memcpy/dst + (call $memcpy/memcpy + (i32.const 13) + (i32.const 36) + (i32.const 3) + ) + ) + (if + (i64.ne + (i64.load + (i32.const 8) + ) + (i64.const 4919131679688438545) + ) + (unreachable) + ) + (set_global $memcpy/dst + (call $memcpy/memcpy + (i32.const 16) + (i32.const 24) + (i32.const 15) + ) + ) + (if + (i64.ne + (i64.load + (i32.const 8) + ) + (i64.const 4919131679688438545) + ) + (unreachable) + ) + (if + (i64.ne + (i64.load + (i32.const 16) + ) + (i64.const 3689348814741910323) + ) + (unreachable) + ) + (if + (i64.ne + (i64.load + (i32.const 24) + ) + (i64.const 3694152654344438852) + ) + (unreachable) + ) + (if + (i64.ne + (i64.load + (i32.const 32) + ) + (i64.const 4919131752989213764) + ) + (unreachable) + ) + ) +) diff --git a/tests/compiler/memcpy.ts b/tests/compiler/memcpy.ts new file mode 100644 index 00000000..e7638568 --- /dev/null +++ b/tests/compiler/memcpy.ts @@ -0,0 +1,169 @@ +export function memcpy(dest: usize, src: usize, n: usize): usize { + // the following is based on musl's implementation of memcpy + let d: usize = dest, s: usize = src; + let w: u32, x: u32; + + // copy 1 byte each until src is aligned to 4 bytes + while (n != 0 && s % 4 != 0) { + store(d++, load(s++)); + n--; + } + + // if dst is aligned to 4 bytes as well, copy 4 bytes each + if (d % 4 == 0) { + while (n >= 16) { + store(d , load(s )); + store(d + 4, load(s + 4)); + store(d + 8, load(s + 8)); + store(d + 12, load(s + 12)); + s += 16; d += 16; n -= 16; + } + if (n & 8) { + store(d , load(s )); + store(d + 4, load(s + 4)); + d += 8; s += 8; + } + if (n & 4) { + store(d, load(s)); + d += 4; s += 4; + } + if (n & 2) { // drop to 2 bytes each + store(d, load(s)); + d += 2; s += 2; + } + if (n & 1) { // drop to 1 byte + store(d++, load(s++)); + } + return dest; + } + + // if dst is not aligned to 4 bytes, use alternating shifts to copy 4 bytes each + // doing shifts if faster when copying enough bytes (here: 32 or more) + if (n >= 32) { + switch (d % 4) { + // known to be != 0 + case 1: + w = load(s); + store(d++, load(s++)); + store(d++, load(s++)); + store(d++, load(s++)); + n -= 3; + while (n >= 17) { + x = load(s + 1); + store(d, w >> 24 | x << 8); + w = load(s + 5); + store(d + 4, x >> 24 | w << 8); + x = load(s + 9); + store(d + 8, w >> 24 | x << 8); + w = load(s + 13); + store(d + 12, x >> 24 | w << 8); + s += 16; d += 16; n -= 16; + } + break; + case 2: + w = load(s); + store(d++, load(s++)); + store(d++, load(s++)); + n -= 2; + while (n >= 18) { + x = load(s + 2); + store(d, w >> 16 | x << 16); + w = load(s + 6); + store(d + 4, x >> 16 | w << 16); + x = load(s + 10); + store(d + 8, w >> 16 | x << 16); + w = load(s + 14); + store(d + 12, x >> 16 | w << 16); + s += 16; d += 16; n -= 16; + } + break; + case 3: + w = load(s); + store(d++, load(s++)); + n -= 1; + while (n >= 19) { + x = load(s + 3); + store(d, w >> 8 | x << 24); + w = load(s + 7); + store(d + 4, x >> 8 | w << 24); + x = load(s + 11); + store(d + 8, w >> 8 | x << 24); + w = load(s + 15); + store(d + 12, x >> 8 | w << 24); + s += 16; d += 16; n -= 16; + } + break; + } + } + + // copy remaining bytes one by one + if (n & 16) { + store(d++, load(s++)); + store(d++, load(s++)); + store(d++, load(s++)); + store(d++, load(s++)); + store(d++, load(s++)); + store(d++, load(s++)); + store(d++, load(s++)); + store(d++, load(s++)); + store(d++, load(s++)); + store(d++, load(s++)); + store(d++, load(s++)); + store(d++, load(s++)); + store(d++, load(s++)); + store(d++, load(s++)); + store(d++, load(s++)); + store(d++, load(s++)); + } + if (n & 8) { + store(d++, load(s++)); + store(d++, load(s++)); + store(d++, load(s++)); + store(d++, load(s++)); + store(d++, load(s++)); + store(d++, load(s++)); + store(d++, load(s++)); + store(d++, load(s++)); + } + if (n & 4) { + store(d++, load(s++)); + store(d++, load(s++)); + store(d++, load(s++)); + store(d++, load(s++)); + } + if (n & 2) { + store(d++, load(s++)); + store(d++, load(s++)); + } + if (n & 1) { + store(d++, load(s++)); + } + return dest; +} + +const base: usize = 8; +store(base , 0x1111111111111111); +store(base + 8 , 0x2222222222222222); +store(base + 16, 0x3333333333333333); +store(base + 24, 0x4444444444444444); + +let dst: usize; +dst = memcpy(base + 1, base + 16, 4); +assert(dst == base + 1); +assert(load(base) == 0x1111113333333311); + +dst = memcpy(base, base, 32); +assert(dst == base); +assert(load(base) == 0x1111113333333311); +assert(load(base + 8) == 0x2222222222222222); +assert(load(base + 16) == 0x3333333333333333); +assert(load(base + 24) == 0x4444444444444444); + +dst = memcpy(base + 5, base + 28, 3); +assert(load(base) == 0x4444443333333311); + +dst = memcpy(base + 8, base + 16, 15); +assert(load(base) == 0x4444443333333311); +assert(load(base + 8) == 0x3333333333333333); +assert(load(base + 16) == 0x3344444444444444); +assert(load(base + 24) == 0x4444444444444444); diff --git a/tests/compiler/memcpy.wast b/tests/compiler/memcpy.wast new file mode 100644 index 00000000..d99abf52 --- /dev/null +++ b/tests/compiler/memcpy.wast @@ -0,0 +1,2175 @@ +(module + (type $iiii (func (param i32 i32 i32) (result i32))) + (type $v (func)) + (global $memcpy/base i32 (i32.const 8)) + (global $memcpy/dst (mut i32) (i32.const 0)) + (global $HEAP_START i32 (i32.const 4)) + (memory $0 1) + (export "memcpy" (func $memcpy/memcpy)) + (export "memory" (memory $0)) + (start $start) + (func $memcpy/memcpy (; 0 ;) (type $iiii) (param $0 i32) (param $1 i32) (param $2 i32) (result i32) + (local $3 i32) + (local $4 i32) + (local $5 i32) + (local $6 i32) + (local $7 i32) + (local $8 i32) + (local $9 i32) + (local $10 i32) + (local $11 i32) + (local $12 i32) + (local $13 i32) + (local $14 i32) + (local $15 i32) + (local $16 i32) + (local $17 i32) + (local $18 i32) + (local $19 i32) + (local $20 i32) + (local $21 i32) + (local $22 i32) + (local $23 i32) + (local $24 i32) + (local $25 i32) + (local $26 i32) + (local $27 i32) + (local $28 i32) + (local $29 i32) + (local $30 i32) + (local $31 i32) + (local $32 i32) + (local $33 i32) + (local $34 i32) + (local $35 i32) + (local $36 i32) + (local $37 i32) + (local $38 i32) + (local $39 i32) + (local $40 i32) + (local $41 i32) + (local $42 i32) + (local $43 i32) + (local $44 i32) + (local $45 i32) + (local $46 i32) + (local $47 i32) + (local $48 i32) + (local $49 i32) + (local $50 i32) + (local $51 i32) + (local $52 i32) + (local $53 i32) + (local $54 i32) + (local $55 i32) + (local $56 i32) + (local $57 i32) + (local $58 i32) + (local $59 i32) + (local $60 i32) + (local $61 i32) + (local $62 i32) + (local $63 i32) + (local $64 i32) + (local $65 i32) + (local $66 i32) + (local $67 i32) + (local $68 i32) + (local $69 i32) + (local $70 i32) + (local $71 i32) + (local $72 i32) + (local $73 i32) + (local $74 i32) + (local $75 i32) + (local $76 i32) + (local $77 i32) + (local $78 i32) + (local $79 i32) + (local $80 i32) + (local $81 i32) + (local $82 i32) + (local $83 i32) + (local $84 i32) + (local $85 i32) + (local $86 i32) + (local $87 i32) + (block + (set_local $3 + (get_local $0) + ) + (set_local $4 + (get_local $1) + ) + ) + (nop) + (block $break|0 + (loop $continue|0 + (if + (if (result i32) + (tee_local $7 + (i32.ne + (get_local $2) + (i32.const 0) + ) + ) + (i32.ne + (i32.rem_s + (get_local $4) + (i32.const 4) + ) + (i32.const 0) + ) + (get_local $7) + ) + (block + (block + (i32.store8 + (block (result i32) + (set_local $8 + (get_local $3) + ) + (set_local $3 + (i32.add + (get_local $8) + (i32.const 1) + ) + ) + (get_local $8) + ) + (i32.load8_u + (block (result i32) + (set_local $9 + (get_local $4) + ) + (set_local $4 + (i32.add + (get_local $9) + (i32.const 1) + ) + ) + (get_local $9) + ) + ) + ) + (drop + (block (result i32) + (set_local $10 + (get_local $2) + ) + (set_local $2 + (i32.sub + (get_local $10) + (i32.const 1) + ) + ) + (get_local $10) + ) + ) + ) + (br $continue|0) + ) + ) + ) + ) + (if + (i32.eq + (i32.rem_s + (get_local $3) + (i32.const 4) + ) + (i32.const 0) + ) + (block + (block $break|1 + (loop $continue|1 + (if + (i32.ge_s + (get_local $2) + (i32.const 16) + ) + (block + (block + (i32.store + (get_local $3) + (i32.load + (get_local $4) + ) + ) + (i32.store + (i32.add + (get_local $3) + (i32.const 4) + ) + (i32.load + (i32.add + (get_local $4) + (i32.const 4) + ) + ) + ) + (i32.store + (i32.add + (get_local $3) + (i32.const 8) + ) + (i32.load + (i32.add + (get_local $4) + (i32.const 8) + ) + ) + ) + (i32.store + (i32.add + (get_local $3) + (i32.const 12) + ) + (i32.load + (i32.add + (get_local $4) + (i32.const 12) + ) + ) + ) + (set_local $4 + (i32.add + (get_local $4) + (i32.const 16) + ) + ) + (set_local $3 + (i32.add + (get_local $3) + (i32.const 16) + ) + ) + (set_local $2 + (i32.sub + (get_local $2) + (i32.const 16) + ) + ) + ) + (br $continue|1) + ) + ) + ) + ) + (if + (i32.and + (get_local $2) + (i32.const 8) + ) + (block + (i32.store + (get_local $3) + (i32.load + (get_local $4) + ) + ) + (i32.store + (i32.add + (get_local $3) + (i32.const 4) + ) + (i32.load + (i32.add + (get_local $4) + (i32.const 4) + ) + ) + ) + (set_local $3 + (i32.add + (get_local $3) + (i32.const 8) + ) + ) + (set_local $4 + (i32.add + (get_local $4) + (i32.const 8) + ) + ) + ) + ) + (if + (i32.and + (get_local $2) + (i32.const 4) + ) + (block + (i32.store + (get_local $3) + (i32.load + (get_local $4) + ) + ) + (set_local $3 + (i32.add + (get_local $3) + (i32.const 4) + ) + ) + (set_local $4 + (i32.add + (get_local $4) + (i32.const 4) + ) + ) + ) + ) + (if + (i32.and + (get_local $2) + (i32.const 2) + ) + (block + (i32.store16 + (get_local $3) + (i32.load16_u + (get_local $4) + ) + ) + (set_local $3 + (i32.add + (get_local $3) + (i32.const 2) + ) + ) + (set_local $4 + (i32.add + (get_local $4) + (i32.const 2) + ) + ) + ) + ) + (if + (i32.and + (get_local $2) + (i32.const 1) + ) + (i32.store8 + (block (result i32) + (set_local $11 + (get_local $3) + ) + (set_local $3 + (i32.add + (get_local $11) + (i32.const 1) + ) + ) + (get_local $11) + ) + (i32.load8_u + (block (result i32) + (set_local $12 + (get_local $4) + ) + (set_local $4 + (i32.add + (get_local $12) + (i32.const 1) + ) + ) + (get_local $12) + ) + ) + ) + ) + (return + (get_local $0) + ) + ) + ) + (if + (i32.ge_s + (get_local $2) + (i32.const 32) + ) + (block $break|2 + (block $case2|2 + (block $case1|2 + (block $case0|2 + (set_local $13 + (i32.rem_s + (get_local $3) + (i32.const 4) + ) + ) + (br_if $case0|2 + (i32.eq + (get_local $13) + (i32.const 1) + ) + ) + (br_if $case1|2 + (i32.eq + (get_local $13) + (i32.const 2) + ) + ) + (br_if $case2|2 + (i32.eq + (get_local $13) + (i32.const 3) + ) + ) + (br $break|2) + ) + (set_local $5 + (i32.load + (get_local $4) + ) + ) + (i32.store8 + (block (result i32) + (set_local $14 + (get_local $3) + ) + (set_local $3 + (i32.add + (get_local $14) + (i32.const 1) + ) + ) + (get_local $14) + ) + (i32.load8_u + (block (result i32) + (set_local $15 + (get_local $4) + ) + (set_local $4 + (i32.add + (get_local $15) + (i32.const 1) + ) + ) + (get_local $15) + ) + ) + ) + (i32.store8 + (block (result i32) + (set_local $16 + (get_local $3) + ) + (set_local $3 + (i32.add + (get_local $16) + (i32.const 1) + ) + ) + (get_local $16) + ) + (i32.load8_u + (block (result i32) + (set_local $17 + (get_local $4) + ) + (set_local $4 + (i32.add + (get_local $17) + (i32.const 1) + ) + ) + (get_local $17) + ) + ) + ) + (i32.store8 + (block (result i32) + (set_local $18 + (get_local $3) + ) + (set_local $3 + (i32.add + (get_local $18) + (i32.const 1) + ) + ) + (get_local $18) + ) + (i32.load8_u + (block (result i32) + (set_local $19 + (get_local $4) + ) + (set_local $4 + (i32.add + (get_local $19) + (i32.const 1) + ) + ) + (get_local $19) + ) + ) + ) + (set_local $2 + (i32.sub + (get_local $2) + (i32.const 3) + ) + ) + (block $break|3 + (loop $continue|3 + (if + (i32.ge_s + (get_local $2) + (i32.const 17) + ) + (block + (block + (set_local $6 + (i32.load + (i32.add + (get_local $4) + (i32.const 1) + ) + ) + ) + (i32.store + (get_local $3) + (i32.or + (i32.shr_u + (get_local $5) + (i32.const 24) + ) + (i32.shl + (get_local $6) + (i32.const 8) + ) + ) + ) + (set_local $5 + (i32.load + (i32.add + (get_local $4) + (i32.const 5) + ) + ) + ) + (i32.store + (i32.add + (get_local $3) + (i32.const 4) + ) + (i32.or + (i32.shr_u + (get_local $6) + (i32.const 24) + ) + (i32.shl + (get_local $5) + (i32.const 8) + ) + ) + ) + (set_local $6 + (i32.load + (i32.add + (get_local $4) + (i32.const 9) + ) + ) + ) + (i32.store + (i32.add + (get_local $3) + (i32.const 8) + ) + (i32.or + (i32.shr_u + (get_local $5) + (i32.const 24) + ) + (i32.shl + (get_local $6) + (i32.const 8) + ) + ) + ) + (set_local $5 + (i32.load + (i32.add + (get_local $4) + (i32.const 13) + ) + ) + ) + (i32.store + (i32.add + (get_local $3) + (i32.const 12) + ) + (i32.or + (i32.shr_u + (get_local $6) + (i32.const 24) + ) + (i32.shl + (get_local $5) + (i32.const 8) + ) + ) + ) + (set_local $4 + (i32.add + (get_local $4) + (i32.const 16) + ) + ) + (set_local $3 + (i32.add + (get_local $3) + (i32.const 16) + ) + ) + (set_local $2 + (i32.sub + (get_local $2) + (i32.const 16) + ) + ) + ) + (br $continue|3) + ) + ) + ) + ) + (br $break|2) + ) + (set_local $5 + (i32.load + (get_local $4) + ) + ) + (i32.store8 + (block (result i32) + (set_local $20 + (get_local $3) + ) + (set_local $3 + (i32.add + (get_local $20) + (i32.const 1) + ) + ) + (get_local $20) + ) + (i32.load8_u + (block (result i32) + (set_local $21 + (get_local $4) + ) + (set_local $4 + (i32.add + (get_local $21) + (i32.const 1) + ) + ) + (get_local $21) + ) + ) + ) + (i32.store8 + (block (result i32) + (set_local $22 + (get_local $3) + ) + (set_local $3 + (i32.add + (get_local $22) + (i32.const 1) + ) + ) + (get_local $22) + ) + (i32.load8_u + (block (result i32) + (set_local $23 + (get_local $4) + ) + (set_local $4 + (i32.add + (get_local $23) + (i32.const 1) + ) + ) + (get_local $23) + ) + ) + ) + (set_local $2 + (i32.sub + (get_local $2) + (i32.const 2) + ) + ) + (block $break|4 + (loop $continue|4 + (if + (i32.ge_s + (get_local $2) + (i32.const 18) + ) + (block + (block + (set_local $6 + (i32.load + (i32.add + (get_local $4) + (i32.const 2) + ) + ) + ) + (i32.store + (get_local $3) + (i32.or + (i32.shr_u + (get_local $5) + (i32.const 16) + ) + (i32.shl + (get_local $6) + (i32.const 16) + ) + ) + ) + (set_local $5 + (i32.load + (i32.add + (get_local $4) + (i32.const 6) + ) + ) + ) + (i32.store + (i32.add + (get_local $3) + (i32.const 4) + ) + (i32.or + (i32.shr_u + (get_local $6) + (i32.const 16) + ) + (i32.shl + (get_local $5) + (i32.const 16) + ) + ) + ) + (set_local $6 + (i32.load + (i32.add + (get_local $4) + (i32.const 10) + ) + ) + ) + (i32.store + (i32.add + (get_local $3) + (i32.const 8) + ) + (i32.or + (i32.shr_u + (get_local $5) + (i32.const 16) + ) + (i32.shl + (get_local $6) + (i32.const 16) + ) + ) + ) + (set_local $5 + (i32.load + (i32.add + (get_local $4) + (i32.const 14) + ) + ) + ) + (i32.store + (i32.add + (get_local $3) + (i32.const 12) + ) + (i32.or + (i32.shr_u + (get_local $6) + (i32.const 16) + ) + (i32.shl + (get_local $5) + (i32.const 16) + ) + ) + ) + (set_local $4 + (i32.add + (get_local $4) + (i32.const 16) + ) + ) + (set_local $3 + (i32.add + (get_local $3) + (i32.const 16) + ) + ) + (set_local $2 + (i32.sub + (get_local $2) + (i32.const 16) + ) + ) + ) + (br $continue|4) + ) + ) + ) + ) + (br $break|2) + ) + (set_local $5 + (i32.load + (get_local $4) + ) + ) + (i32.store8 + (block (result i32) + (set_local $24 + (get_local $3) + ) + (set_local $3 + (i32.add + (get_local $24) + (i32.const 1) + ) + ) + (get_local $24) + ) + (i32.load8_u + (block (result i32) + (set_local $25 + (get_local $4) + ) + (set_local $4 + (i32.add + (get_local $25) + (i32.const 1) + ) + ) + (get_local $25) + ) + ) + ) + (set_local $2 + (i32.sub + (get_local $2) + (i32.const 1) + ) + ) + (block $break|5 + (loop $continue|5 + (if + (i32.ge_s + (get_local $2) + (i32.const 19) + ) + (block + (block + (set_local $6 + (i32.load + (i32.add + (get_local $4) + (i32.const 3) + ) + ) + ) + (i32.store + (get_local $3) + (i32.or + (i32.shr_u + (get_local $5) + (i32.const 8) + ) + (i32.shl + (get_local $6) + (i32.const 24) + ) + ) + ) + (set_local $5 + (i32.load + (i32.add + (get_local $4) + (i32.const 7) + ) + ) + ) + (i32.store + (i32.add + (get_local $3) + (i32.const 4) + ) + (i32.or + (i32.shr_u + (get_local $6) + (i32.const 8) + ) + (i32.shl + (get_local $5) + (i32.const 24) + ) + ) + ) + (set_local $6 + (i32.load + (i32.add + (get_local $4) + (i32.const 11) + ) + ) + ) + (i32.store + (i32.add + (get_local $3) + (i32.const 8) + ) + (i32.or + (i32.shr_u + (get_local $5) + (i32.const 8) + ) + (i32.shl + (get_local $6) + (i32.const 24) + ) + ) + ) + (set_local $5 + (i32.load + (i32.add + (get_local $4) + (i32.const 15) + ) + ) + ) + (i32.store + (i32.add + (get_local $3) + (i32.const 12) + ) + (i32.or + (i32.shr_u + (get_local $6) + (i32.const 8) + ) + (i32.shl + (get_local $5) + (i32.const 24) + ) + ) + ) + (set_local $4 + (i32.add + (get_local $4) + (i32.const 16) + ) + ) + (set_local $3 + (i32.add + (get_local $3) + (i32.const 16) + ) + ) + (set_local $2 + (i32.sub + (get_local $2) + (i32.const 16) + ) + ) + ) + (br $continue|5) + ) + ) + ) + ) + (br $break|2) + ) + ) + (if + (i32.and + (get_local $2) + (i32.const 16) + ) + (block + (i32.store8 + (block (result i32) + (set_local $26 + (get_local $3) + ) + (set_local $3 + (i32.add + (get_local $26) + (i32.const 1) + ) + ) + (get_local $26) + ) + (i32.load8_u + (block (result i32) + (set_local $27 + (get_local $4) + ) + (set_local $4 + (i32.add + (get_local $27) + (i32.const 1) + ) + ) + (get_local $27) + ) + ) + ) + (i32.store8 + (block (result i32) + (set_local $28 + (get_local $3) + ) + (set_local $3 + (i32.add + (get_local $28) + (i32.const 1) + ) + ) + (get_local $28) + ) + (i32.load8_u + (block (result i32) + (set_local $29 + (get_local $4) + ) + (set_local $4 + (i32.add + (get_local $29) + (i32.const 1) + ) + ) + (get_local $29) + ) + ) + ) + (i32.store8 + (block (result i32) + (set_local $30 + (get_local $3) + ) + (set_local $3 + (i32.add + (get_local $30) + (i32.const 1) + ) + ) + (get_local $30) + ) + (i32.load8_u + (block (result i32) + (set_local $31 + (get_local $4) + ) + (set_local $4 + (i32.add + (get_local $31) + (i32.const 1) + ) + ) + (get_local $31) + ) + ) + ) + (i32.store8 + (block (result i32) + (set_local $32 + (get_local $3) + ) + (set_local $3 + (i32.add + (get_local $32) + (i32.const 1) + ) + ) + (get_local $32) + ) + (i32.load8_u + (block (result i32) + (set_local $33 + (get_local $4) + ) + (set_local $4 + (i32.add + (get_local $33) + (i32.const 1) + ) + ) + (get_local $33) + ) + ) + ) + (i32.store8 + (block (result i32) + (set_local $34 + (get_local $3) + ) + (set_local $3 + (i32.add + (get_local $34) + (i32.const 1) + ) + ) + (get_local $34) + ) + (i32.load8_u + (block (result i32) + (set_local $35 + (get_local $4) + ) + (set_local $4 + (i32.add + (get_local $35) + (i32.const 1) + ) + ) + (get_local $35) + ) + ) + ) + (i32.store8 + (block (result i32) + (set_local $36 + (get_local $3) + ) + (set_local $3 + (i32.add + (get_local $36) + (i32.const 1) + ) + ) + (get_local $36) + ) + (i32.load8_u + (block (result i32) + (set_local $37 + (get_local $4) + ) + (set_local $4 + (i32.add + (get_local $37) + (i32.const 1) + ) + ) + (get_local $37) + ) + ) + ) + (i32.store8 + (block (result i32) + (set_local $38 + (get_local $3) + ) + (set_local $3 + (i32.add + (get_local $38) + (i32.const 1) + ) + ) + (get_local $38) + ) + (i32.load8_u + (block (result i32) + (set_local $39 + (get_local $4) + ) + (set_local $4 + (i32.add + (get_local $39) + (i32.const 1) + ) + ) + (get_local $39) + ) + ) + ) + (i32.store8 + (block (result i32) + (set_local $40 + (get_local $3) + ) + (set_local $3 + (i32.add + (get_local $40) + (i32.const 1) + ) + ) + (get_local $40) + ) + (i32.load8_u + (block (result i32) + (set_local $41 + (get_local $4) + ) + (set_local $4 + (i32.add + (get_local $41) + (i32.const 1) + ) + ) + (get_local $41) + ) + ) + ) + (i32.store8 + (block (result i32) + (set_local $42 + (get_local $3) + ) + (set_local $3 + (i32.add + (get_local $42) + (i32.const 1) + ) + ) + (get_local $42) + ) + (i32.load8_u + (block (result i32) + (set_local $43 + (get_local $4) + ) + (set_local $4 + (i32.add + (get_local $43) + (i32.const 1) + ) + ) + (get_local $43) + ) + ) + ) + (i32.store8 + (block (result i32) + (set_local $44 + (get_local $3) + ) + (set_local $3 + (i32.add + (get_local $44) + (i32.const 1) + ) + ) + (get_local $44) + ) + (i32.load8_u + (block (result i32) + (set_local $45 + (get_local $4) + ) + (set_local $4 + (i32.add + (get_local $45) + (i32.const 1) + ) + ) + (get_local $45) + ) + ) + ) + (i32.store8 + (block (result i32) + (set_local $46 + (get_local $3) + ) + (set_local $3 + (i32.add + (get_local $46) + (i32.const 1) + ) + ) + (get_local $46) + ) + (i32.load8_u + (block (result i32) + (set_local $47 + (get_local $4) + ) + (set_local $4 + (i32.add + (get_local $47) + (i32.const 1) + ) + ) + (get_local $47) + ) + ) + ) + (i32.store8 + (block (result i32) + (set_local $48 + (get_local $3) + ) + (set_local $3 + (i32.add + (get_local $48) + (i32.const 1) + ) + ) + (get_local $48) + ) + (i32.load8_u + (block (result i32) + (set_local $49 + (get_local $4) + ) + (set_local $4 + (i32.add + (get_local $49) + (i32.const 1) + ) + ) + (get_local $49) + ) + ) + ) + (i32.store8 + (block (result i32) + (set_local $50 + (get_local $3) + ) + (set_local $3 + (i32.add + (get_local $50) + (i32.const 1) + ) + ) + (get_local $50) + ) + (i32.load8_u + (block (result i32) + (set_local $51 + (get_local $4) + ) + (set_local $4 + (i32.add + (get_local $51) + (i32.const 1) + ) + ) + (get_local $51) + ) + ) + ) + (i32.store8 + (block (result i32) + (set_local $52 + (get_local $3) + ) + (set_local $3 + (i32.add + (get_local $52) + (i32.const 1) + ) + ) + (get_local $52) + ) + (i32.load8_u + (block (result i32) + (set_local $53 + (get_local $4) + ) + (set_local $4 + (i32.add + (get_local $53) + (i32.const 1) + ) + ) + (get_local $53) + ) + ) + ) + (i32.store8 + (block (result i32) + (set_local $54 + (get_local $3) + ) + (set_local $3 + (i32.add + (get_local $54) + (i32.const 1) + ) + ) + (get_local $54) + ) + (i32.load8_u + (block (result i32) + (set_local $55 + (get_local $4) + ) + (set_local $4 + (i32.add + (get_local $55) + (i32.const 1) + ) + ) + (get_local $55) + ) + ) + ) + (i32.store8 + (block (result i32) + (set_local $56 + (get_local $3) + ) + (set_local $3 + (i32.add + (get_local $56) + (i32.const 1) + ) + ) + (get_local $56) + ) + (i32.load8_u + (block (result i32) + (set_local $57 + (get_local $4) + ) + (set_local $4 + (i32.add + (get_local $57) + (i32.const 1) + ) + ) + (get_local $57) + ) + ) + ) + ) + ) + (if + (i32.and + (get_local $2) + (i32.const 8) + ) + (block + (i32.store8 + (block (result i32) + (set_local $58 + (get_local $3) + ) + (set_local $3 + (i32.add + (get_local $58) + (i32.const 1) + ) + ) + (get_local $58) + ) + (i32.load8_u + (block (result i32) + (set_local $59 + (get_local $4) + ) + (set_local $4 + (i32.add + (get_local $59) + (i32.const 1) + ) + ) + (get_local $59) + ) + ) + ) + (i32.store8 + (block (result i32) + (set_local $60 + (get_local $3) + ) + (set_local $3 + (i32.add + (get_local $60) + (i32.const 1) + ) + ) + (get_local $60) + ) + (i32.load8_u + (block (result i32) + (set_local $61 + (get_local $4) + ) + (set_local $4 + (i32.add + (get_local $61) + (i32.const 1) + ) + ) + (get_local $61) + ) + ) + ) + (i32.store8 + (block (result i32) + (set_local $62 + (get_local $3) + ) + (set_local $3 + (i32.add + (get_local $62) + (i32.const 1) + ) + ) + (get_local $62) + ) + (i32.load8_u + (block (result i32) + (set_local $63 + (get_local $4) + ) + (set_local $4 + (i32.add + (get_local $63) + (i32.const 1) + ) + ) + (get_local $63) + ) + ) + ) + (i32.store8 + (block (result i32) + (set_local $64 + (get_local $3) + ) + (set_local $3 + (i32.add + (get_local $64) + (i32.const 1) + ) + ) + (get_local $64) + ) + (i32.load8_u + (block (result i32) + (set_local $65 + (get_local $4) + ) + (set_local $4 + (i32.add + (get_local $65) + (i32.const 1) + ) + ) + (get_local $65) + ) + ) + ) + (i32.store8 + (block (result i32) + (set_local $66 + (get_local $3) + ) + (set_local $3 + (i32.add + (get_local $66) + (i32.const 1) + ) + ) + (get_local $66) + ) + (i32.load8_u + (block (result i32) + (set_local $67 + (get_local $4) + ) + (set_local $4 + (i32.add + (get_local $67) + (i32.const 1) + ) + ) + (get_local $67) + ) + ) + ) + (i32.store8 + (block (result i32) + (set_local $68 + (get_local $3) + ) + (set_local $3 + (i32.add + (get_local $68) + (i32.const 1) + ) + ) + (get_local $68) + ) + (i32.load8_u + (block (result i32) + (set_local $69 + (get_local $4) + ) + (set_local $4 + (i32.add + (get_local $69) + (i32.const 1) + ) + ) + (get_local $69) + ) + ) + ) + (i32.store8 + (block (result i32) + (set_local $70 + (get_local $3) + ) + (set_local $3 + (i32.add + (get_local $70) + (i32.const 1) + ) + ) + (get_local $70) + ) + (i32.load8_u + (block (result i32) + (set_local $71 + (get_local $4) + ) + (set_local $4 + (i32.add + (get_local $71) + (i32.const 1) + ) + ) + (get_local $71) + ) + ) + ) + (i32.store8 + (block (result i32) + (set_local $72 + (get_local $3) + ) + (set_local $3 + (i32.add + (get_local $72) + (i32.const 1) + ) + ) + (get_local $72) + ) + (i32.load8_u + (block (result i32) + (set_local $73 + (get_local $4) + ) + (set_local $4 + (i32.add + (get_local $73) + (i32.const 1) + ) + ) + (get_local $73) + ) + ) + ) + ) + ) + (if + (i32.and + (get_local $2) + (i32.const 4) + ) + (block + (i32.store8 + (block (result i32) + (set_local $74 + (get_local $3) + ) + (set_local $3 + (i32.add + (get_local $74) + (i32.const 1) + ) + ) + (get_local $74) + ) + (i32.load8_u + (block (result i32) + (set_local $75 + (get_local $4) + ) + (set_local $4 + (i32.add + (get_local $75) + (i32.const 1) + ) + ) + (get_local $75) + ) + ) + ) + (i32.store8 + (block (result i32) + (set_local $76 + (get_local $3) + ) + (set_local $3 + (i32.add + (get_local $76) + (i32.const 1) + ) + ) + (get_local $76) + ) + (i32.load8_u + (block (result i32) + (set_local $77 + (get_local $4) + ) + (set_local $4 + (i32.add + (get_local $77) + (i32.const 1) + ) + ) + (get_local $77) + ) + ) + ) + (i32.store8 + (block (result i32) + (set_local $78 + (get_local $3) + ) + (set_local $3 + (i32.add + (get_local $78) + (i32.const 1) + ) + ) + (get_local $78) + ) + (i32.load8_u + (block (result i32) + (set_local $79 + (get_local $4) + ) + (set_local $4 + (i32.add + (get_local $79) + (i32.const 1) + ) + ) + (get_local $79) + ) + ) + ) + (i32.store8 + (block (result i32) + (set_local $80 + (get_local $3) + ) + (set_local $3 + (i32.add + (get_local $80) + (i32.const 1) + ) + ) + (get_local $80) + ) + (i32.load8_u + (block (result i32) + (set_local $81 + (get_local $4) + ) + (set_local $4 + (i32.add + (get_local $81) + (i32.const 1) + ) + ) + (get_local $81) + ) + ) + ) + ) + ) + (if + (i32.and + (get_local $2) + (i32.const 2) + ) + (block + (i32.store8 + (block (result i32) + (set_local $82 + (get_local $3) + ) + (set_local $3 + (i32.add + (get_local $82) + (i32.const 1) + ) + ) + (get_local $82) + ) + (i32.load8_u + (block (result i32) + (set_local $83 + (get_local $4) + ) + (set_local $4 + (i32.add + (get_local $83) + (i32.const 1) + ) + ) + (get_local $83) + ) + ) + ) + (i32.store8 + (block (result i32) + (set_local $84 + (get_local $3) + ) + (set_local $3 + (i32.add + (get_local $84) + (i32.const 1) + ) + ) + (get_local $84) + ) + (i32.load8_u + (block (result i32) + (set_local $85 + (get_local $4) + ) + (set_local $4 + (i32.add + (get_local $85) + (i32.const 1) + ) + ) + (get_local $85) + ) + ) + ) + ) + ) + (if + (i32.and + (get_local $2) + (i32.const 1) + ) + (i32.store8 + (block (result i32) + (set_local $86 + (get_local $3) + ) + (set_local $3 + (i32.add + (get_local $86) + (i32.const 1) + ) + ) + (get_local $86) + ) + (i32.load8_u + (block (result i32) + (set_local $87 + (get_local $4) + ) + (set_local $4 + (i32.add + (get_local $87) + (i32.const 1) + ) + ) + (get_local $87) + ) + ) + ) + ) + (return + (get_local $0) + ) + ) + (func $start (; 1 ;) (type $v) + (i64.store + (i32.const 8) + (i64.const 1229782938247303441) + ) + (i64.store + (i32.add + (i32.const 8) + (i32.const 8) + ) + (i64.const 2459565876494606882) + ) + (i64.store + (i32.add + (i32.const 8) + (i32.const 16) + ) + (i64.const 3689348814741910323) + ) + (i64.store + (i32.add + (i32.const 8) + (i32.const 24) + ) + (i64.const 4919131752989213764) + ) + (set_global $memcpy/dst + (call $memcpy/memcpy + (i32.add + (i32.const 8) + (i32.const 1) + ) + (i32.add + (i32.const 8) + (i32.const 16) + ) + (i32.const 4) + ) + ) + (if + (i32.eqz + (i32.eq + (get_global $memcpy/dst) + (i32.add + (i32.const 8) + (i32.const 1) + ) + ) + ) + (unreachable) + ) + (if + (i32.eqz + (i64.eq + (i64.load + (i32.const 8) + ) + (i64.const 1229783084848853777) + ) + ) + (unreachable) + ) + (set_global $memcpy/dst + (call $memcpy/memcpy + (i32.const 8) + (i32.const 8) + (i32.const 32) + ) + ) + (if + (i32.eqz + (i32.eq + (get_global $memcpy/dst) + (i32.const 8) + ) + ) + (unreachable) + ) + (if + (i32.eqz + (i64.eq + (i64.load + (i32.const 8) + ) + (i64.const 1229783084848853777) + ) + ) + (unreachable) + ) + (if + (i32.eqz + (i64.eq + (i64.load + (i32.add + (i32.const 8) + (i32.const 8) + ) + ) + (i64.const 2459565876494606882) + ) + ) + (unreachable) + ) + (if + (i32.eqz + (i64.eq + (i64.load + (i32.add + (i32.const 8) + (i32.const 16) + ) + ) + (i64.const 3689348814741910323) + ) + ) + (unreachable) + ) + (if + (i32.eqz + (i64.eq + (i64.load + (i32.add + (i32.const 8) + (i32.const 24) + ) + ) + (i64.const 4919131752989213764) + ) + ) + (unreachable) + ) + (set_global $memcpy/dst + (call $memcpy/memcpy + (i32.add + (i32.const 8) + (i32.const 5) + ) + (i32.add + (i32.const 8) + (i32.const 28) + ) + (i32.const 3) + ) + ) + (if + (i32.eqz + (i64.eq + (i64.load + (i32.const 8) + ) + (i64.const 4919131679688438545) + ) + ) + (unreachable) + ) + (set_global $memcpy/dst + (call $memcpy/memcpy + (i32.add + (i32.const 8) + (i32.const 8) + ) + (i32.add + (i32.const 8) + (i32.const 16) + ) + (i32.const 15) + ) + ) + (if + (i32.eqz + (i64.eq + (i64.load + (i32.const 8) + ) + (i64.const 4919131679688438545) + ) + ) + (unreachable) + ) + (if + (i32.eqz + (i64.eq + (i64.load + (i32.add + (i32.const 8) + (i32.const 8) + ) + ) + (i64.const 3689348814741910323) + ) + ) + (unreachable) + ) + (if + (i32.eqz + (i64.eq + (i64.load + (i32.add + (i32.const 8) + (i32.const 16) + ) + ) + (i64.const 3694152654344438852) + ) + ) + (unreachable) + ) + (if + (i32.eqz + (i64.eq + (i64.load + (i32.add + (i32.const 8) + (i32.const 24) + ) + ) + (i64.const 4919131752989213764) + ) + ) + (unreachable) + ) + ) +) +(; +[program.elements] + clz + ctz + popcnt + rotl + rotr + abs + ceil + copysign + floor + max + min + nearest + sqrt + trunc + current_memory + grow_memory + unreachable + load + store + reinterpret + select + sizeof + changetype + isNaN + isFinite + assert + memcpy/memcpy + memcpy/base + memcpy/dst +[program.exports] + memcpy/memcpy +;) diff --git a/tests/compiler/switch.optimized.wast b/tests/compiler/switch.optimized.wast index ab5bd232..094ada85 100644 --- a/tests/compiler/switch.optimized.wast +++ b/tests/compiler/switch.optimized.wast @@ -6,15 +6,15 @@ (export "doSwitchDefaultOmitted" (func $switch/doSwitchDefaultOmitted)) (export "memory" (memory $0)) (func $switch/doSwitch (; 0 ;) (type $ii) (param $0 i32) (result i32) - (block $case4|1.1 - (block $case2|1.1 - (block $case0|1.1 + (block $case4|0 + (block $case2|0 + (block $case0|0 (block $tablify|0 - (br_table $case2|1.1 $case0|1.1 $case4|1.1 $case4|1.1 $tablify|0 + (br_table $case2|0 $case0|0 $case4|0 $case4|0 $tablify|0 (get_local $0) ) ) - (br $case2|1.1) + (br $case2|0) ) (return (i32.const 1) @@ -27,10 +27,10 @@ (i32.const 23) ) (func $switch/doSwitchDefaultFirst (; 1 ;) (type $ii) (param $0 i32) (result i32) - (block $case3|1.1 - (block $case1|1.1 + (block $case3|0 + (block $case1|0 (block $tablify|0 - (br_table $case1|1.1 $case3|1.1 $case3|1.1 $tablify|0 + (br_table $case1|0 $case3|0 $case3|0 $tablify|0 (i32.sub (get_local $0) (i32.const 1) @@ -48,18 +48,18 @@ (i32.const 23) ) (func $switch/doSwitchDefaultOmitted (; 2 ;) (type $ii) (param $0 i32) (result i32) - (block $break|1.1 - (block $case2|1.1 - (block $case0|1.1 + (block $break|0 + (block $case2|0 + (block $case0|0 (block $tablify|0 - (br_table $case0|1.1 $case2|1.1 $case2|1.1 $tablify|0 + (br_table $case0|0 $case2|0 $case2|0 $tablify|0 (i32.sub (get_local $0) (i32.const 1) ) ) ) - (br $break|1.1) + (br $break|0) ) (return (i32.const 1) diff --git a/tests/compiler/switch.wast b/tests/compiler/switch.wast index 4ba127ee..474af8cc 100644 --- a/tests/compiler/switch.wast +++ b/tests/compiler/switch.wast @@ -8,40 +8,40 @@ (export "memory" (memory $0)) (func $switch/doSwitch (; 0 ;) (type $ii) (param $0 i32) (result i32) (local $1 i32) - (block $break|1.1 - (block $case4|1.1 - (block $case3|1.1 - (block $case2|1.1 - (block $case1|1.1 - (block $case0|1.1 + (block $break|0 + (block $case4|0 + (block $case3|0 + (block $case2|0 + (block $case1|0 + (block $case0|0 (set_local $1 (get_local $0) ) - (br_if $case0|1.1 + (br_if $case0|0 (i32.eq (get_local $1) (i32.const 1) ) ) - (br_if $case1|1.1 + (br_if $case1|0 (i32.eq (get_local $1) (i32.const 0) ) ) - (br_if $case3|1.1 + (br_if $case3|0 (i32.eq (get_local $1) (i32.const 2) ) ) - (br_if $case4|1.1 + (br_if $case4|0 (i32.eq (get_local $1) (i32.const 3) ) ) - (br $case2|1.1) + (br $case2|0) ) (return (i32.const 1) @@ -60,33 +60,33 @@ ) (func $switch/doSwitchDefaultFirst (; 1 ;) (type $ii) (param $0 i32) (result i32) (local $1 i32) - (block $break|1.1 - (block $case3|1.1 - (block $case2|1.1 - (block $case1|1.1 - (block $case0|1.1 + (block $break|0 + (block $case3|0 + (block $case2|0 + (block $case1|0 + (block $case0|0 (set_local $1 (get_local $0) ) - (br_if $case1|1.1 + (br_if $case1|0 (i32.eq (get_local $1) (i32.const 1) ) ) - (br_if $case2|1.1 + (br_if $case2|0 (i32.eq (get_local $1) (i32.const 2) ) ) - (br_if $case3|1.1 + (br_if $case3|0 (i32.eq (get_local $1) (i32.const 3) ) ) - (br $case0|1.1) + (br $case0|0) ) (return (i32.const 0) @@ -104,32 +104,32 @@ ) (func $switch/doSwitchDefaultOmitted (; 2 ;) (type $ii) (param $0 i32) (result i32) (local $1 i32) - (block $break|1.1 - (block $case2|1.1 - (block $case1|1.1 - (block $case0|1.1 + (block $break|0 + (block $case2|0 + (block $case1|0 + (block $case0|0 (set_local $1 (get_local $0) ) - (br_if $case0|1.1 + (br_if $case0|0 (i32.eq (get_local $1) (i32.const 1) ) ) - (br_if $case1|1.1 + (br_if $case1|0 (i32.eq (get_local $1) (i32.const 2) ) ) - (br_if $case2|1.1 + (br_if $case2|0 (i32.eq (get_local $1) (i32.const 3) ) ) - (br $break|1.1) + (br $break|0) ) (return (i32.const 1) diff --git a/tests/compiler/tlsf.optimized-inlined.wast b/tests/compiler/tlsf.optimized-inlined.wast index f2be9451..3bb1b2b7 100644 --- a/tests/compiler/tlsf.optimized-inlined.wast +++ b/tests/compiler/tlsf.optimized-inlined.wast @@ -131,7 +131,7 @@ (set_local $1 (i32.const 0) ) - (loop $continue|1.1 + (loop $continue|0 (if (i32.lt_s (get_local $1) @@ -176,7 +176,7 @@ (set_local $2 (i32.const 0) ) - (loop $continue|1.2 + (loop $continue|1 (if (i32.lt_s (get_local $2) @@ -195,7 +195,7 @@ (i32.const 1) ) ) - (br $continue|1.2) + (br $continue|1) ) ) ) @@ -205,7 +205,7 @@ (i32.const 1) ) ) - (br $continue|1.1) + (br $continue|0) ) ) ) diff --git a/tests/compiler/tlsf.optimized.wast b/tests/compiler/tlsf.optimized.wast index 39c14039..89df28f2 100644 --- a/tests/compiler/tlsf.optimized.wast +++ b/tests/compiler/tlsf.optimized.wast @@ -132,7 +132,7 @@ (set_local $1 (i32.const 0) ) - (loop $continue|1.1 + (loop $continue|0 (if (i32.lt_s (get_local $1) @@ -147,7 +147,7 @@ (set_local $2 (i32.const 0) ) - (loop $continue|1.2 + (loop $continue|1 (if (i32.lt_s (get_local $2) @@ -166,7 +166,7 @@ (i32.const 1) ) ) - (br $continue|1.2) + (br $continue|1) ) ) ) @@ -176,7 +176,7 @@ (i32.const 1) ) ) - (br $continue|1.1) + (br $continue|0) ) ) ) diff --git a/tests/compiler/tlsf.wast b/tests/compiler/tlsf.wast index 7abebab1..1580a8ca 100644 --- a/tests/compiler/tlsf.wast +++ b/tests/compiler/tlsf.wast @@ -166,13 +166,13 @@ (get_local $0) (i32.const 0) ) - (block $break|1.1 + (block $break|0 (block (set_local $1 (i32.const 0) ) ) - (loop $continue|1.1 + (loop $continue|0 (if (i32.lt_s (get_local $1) @@ -185,13 +185,13 @@ (get_local $1) (i32.const 0) ) - (block $break|1.2 + (block $break|1 (block (set_local $2 (i32.const 0) ) ) - (loop $continue|1.2 + (loop $continue|1 (if (i32.lt_s (get_local $2) @@ -210,7 +210,7 @@ (i32.const 1) ) ) - (br $continue|1.2) + (br $continue|1) ) ) ) @@ -222,7 +222,7 @@ (i32.const 1) ) ) - (br $continue|1.1) + (br $continue|0) ) ) ) diff --git a/tests/compiler/while.optimized.wast b/tests/compiler/while.optimized.wast index 2827adaa..d7dfbaf4 100644 --- a/tests/compiler/while.optimized.wast +++ b/tests/compiler/while.optimized.wast @@ -5,7 +5,7 @@ (export "loopWhileInWhile" (func $while/loopWhileInWhile)) (export "memory" (memory $0)) (func $while/loopWhile (; 0 ;) (type $iv) (param $0 i32) - (loop $continue|1.1 + (loop $continue|0 (if (get_local $0) (block @@ -15,13 +15,13 @@ (i32.const 1) ) ) - (br $continue|1.1) + (br $continue|0) ) ) ) ) (func $while/loopWhileInWhile (; 1 ;) (type $iv) (param $0 i32) - (loop $continue|1.1 + (loop $continue|0 (if (get_local $0) (block @@ -31,7 +31,7 @@ (i32.const 1) ) ) - (loop $continue|1.2 + (loop $continue|1 (if (get_local $0) (block @@ -41,11 +41,11 @@ (i32.const 1) ) ) - (br $continue|1.2) + (br $continue|1) ) ) ) - (br $continue|1.1) + (br $continue|0) ) ) ) diff --git a/tests/compiler/while.wast b/tests/compiler/while.wast index ac827a47..65d9f503 100644 --- a/tests/compiler/while.wast +++ b/tests/compiler/while.wast @@ -6,8 +6,8 @@ (export "loopWhileInWhile" (func $while/loopWhileInWhile)) (export "memory" (memory $0)) (func $while/loopWhile (; 0 ;) (type $iv) (param $0 i32) - (block $break|1.1 - (loop $continue|1.1 + (block $break|0 + (loop $continue|0 (if (get_local $0) (block @@ -17,15 +17,15 @@ (i32.const 1) ) ) - (br $continue|1.1) + (br $continue|0) ) ) ) ) ) (func $while/loopWhileInWhile (; 1 ;) (type $iv) (param $0 i32) - (block $break|1.1 - (loop $continue|1.1 + (block $break|0 + (loop $continue|0 (if (get_local $0) (block @@ -36,8 +36,8 @@ (i32.const 1) ) ) - (block $break|1.2 - (loop $continue|1.2 + (block $break|1 + (loop $continue|1 (if (get_local $0) (block @@ -47,13 +47,13 @@ (i32.const 1) ) ) - (br $continue|1.2) + (br $continue|1) ) ) ) ) ) - (br $continue|1.1) + (br $continue|0) ) ) )