From a742537dd9b2a02ca85e8b1acdf9618116932d03 Mon Sep 17 00:00:00 2001 From: dcode Date: Thu, 28 Mar 2019 22:49:22 +0100 Subject: [PATCH] split into encoders and decoders --- std/assembly/encoding.ts | 119 ++++++--- std/assembly/index.d.ts | 37 ++- std/assembly/string.ts | 1 - tests/compiler/number.optimized.wat | 2 +- tests/compiler/number.untouched.wat | 2 +- tests/compiler/std/array-access.optimized.wat | 2 +- tests/compiler/std/array-access.untouched.wat | 2 +- tests/compiler/std/array.optimized.wat | 2 +- tests/compiler/std/array.untouched.wat | 4 +- tests/compiler/std/encoding.optimized.wat | 111 ++++++--- tests/compiler/std/encoding.ts | 48 ++-- tests/compiler/std/encoding.untouched.wat | 231 +++++++++++------- tests/compiler/std/string.optimized.wat | 24 +- tests/compiler/std/string.untouched.wat | 24 +- 14 files changed, 378 insertions(+), 231 deletions(-) diff --git a/std/assembly/encoding.ts b/std/assembly/encoding.ts index 8915f11a..42bde1be 100644 --- a/std/assembly/encoding.ts +++ b/std/assembly/encoding.ts @@ -1,46 +1,71 @@ import { ALLOCATE, REGISTER, REALLOCATE, MAX_BYTELENGTH } from "./runtime"; -import { E_INVALIDLENGTH } from "./util/error"; +import { E_INVALIDLENGTH, E_NOTIMPLEMENTED } from "./util/error"; -/** UTF16 encoding. */ -export namespace UTF16 { +export class UTF16Encoder { - /** Calculates the length of a string when encoded as an UTF16 buffer. */ - export function length(str: string): i32 { + /** Calculates the length of a string when encoded as UTF16 bytes. */ + static byteLength(str: string): i32 { return str.length << 1; } - /** Encodes a string as an UTF16 buffer. */ - export function encode(str: string): ArrayBuffer { + /** Encodes a string to UTF16 bytes. */ + static encode(str: string): ArrayBuffer { var size = str.length << 1; var buf = ALLOCATE(size); memory.copy(buf, changetype(str), size); return REGISTER(buf); } - /** Decodes an UTF16 buffer to a string.*/ - export function decode(buf: ArrayBuffer): string { - return decodeRaw(changetype(buf), buf.byteLength); + constructor() { + throw new Error(E_NOTIMPLEMENTED); } - // @ts-ignore: decorator + write(str: string): void { + throw new Error(E_NOTIMPLEMENTED); + } + + end(): ArrayBuffer { + throw new Error(E_NOTIMPLEMENTED); + } +} + +export class UTF16Decoder { + + /** Decodes UTF16 bytes to a string.*/ + static decode(buf: ArrayBuffer): string { + return UTF16Decoder.decodeUnsafe(changetype(buf), buf.byteLength); + } + + /** Decodes UTF16 bytes to a string. */ @unsafe - export function decodeRaw(buf: usize, len: i32): string { + static decodeUnsafe(buf: usize, len: i32): string { if (len > MAX_BYTELENGTH) throw new RangeError(E_INVALIDLENGTH); var size = len; var str = ALLOCATE(size); memory.copy(str, changetype(buf), size); return REGISTER(str); } + + constructor() { + throw new Error(E_NOTIMPLEMENTED); + } + + write(buf: ArrayBuffer): void { + throw new Error(E_NOTIMPLEMENTED); + } + + end(): string { + throw new Error(E_NOTIMPLEMENTED); + } } -/** UTF8 encoding. */ -export namespace UTF8 { +export class UTF8Encoder { - /** Calculates the length of a string when encoded as an UTF8 buffer. */ - export function length(str: string, delimited: bool = false): i32 { + /** Calculates the length of a string when encoded as UTF8 bytes. */ + static byteLength(str: string, nullTerminated: bool = false): i32 { var strOff = changetype(str); var strEnd = changetype(str) + (str.length << 1); - var bufLen = delimited ? 1 : 0; + var bufLen = nullTerminated ? 1 : 0; while (strOff < strEnd) { let c = load(strOff); if (c < 128) { @@ -60,11 +85,11 @@ export namespace UTF8 { return bufLen; } - /** Encodes a string as an UTF8 buffer. */ - export function encode(str: string, delimited: bool = false): ArrayBuffer { + /** Encodes a string as UTF8 bytes. */ + static encode(str: string, nullTerminated: bool = false): ArrayBuffer { var strOff = changetype(str); var strEnd = changetype(str) + (str.length << 1); - var buf = ALLOCATE(length(str, delimited)); + var buf = ALLOCATE(UTF8Encoder.byteLength(str, nullTerminated)); var bufOff = changetype(buf); while (strOff < strEnd) { let c1 = load(strOff); @@ -95,22 +120,39 @@ export namespace UTF8 { } } assert(strOff == strEnd); - if (delimited) store(bufOff, 0); + if (nullTerminated) store(bufOff, 0); return REGISTER(buf); } - /** Decodes an UTF8 buffer to a string.*/ - export function decode(buf: ArrayBuffer, delimited: bool = false): string { - return delimited - ? decodeRawDelimited(changetype(buf), buf.byteLength) - : decodeRaw(changetype(buf), buf.byteLength); + constructor() { + throw new Error(E_NOTIMPLEMENTED); } - // @ts-ignore: decorator + write(str: string): void { + throw new Error(E_NOTIMPLEMENTED); + } + + end(): ArrayBuffer { + throw new Error(E_NOTIMPLEMENTED); + } +} + +export class UTF8Decoder { + + /** Decodes UTF8 bytes to a string.*/ + static decode(buf: ArrayBuffer, nullTerminated: bool = false): string { + return nullTerminated + ? UTF8Decoder.decodeNullTerminatedUnsafe(changetype(buf), buf.byteLength) + : UTF8Decoder.decodeUnsafe(changetype(buf), buf.byteLength); + } + + /** Decodes UTF8 bytes to a string.*/ @unsafe - export function decodeRaw(buf: usize, len: i32): string { + static decodeUnsafe(buf: usize, len: i32): string { + if (len > MAX_BYTELENGTH) throw new RangeError(E_INVALIDLENGTH); var bufOff = buf; var bufEnd = buf + len; + assert(bufEnd >= bufOff); // guard wraparound var str = ALLOCATE(len << 1); // max is one u16 char per u8 byte var strOff = str; while (bufOff < bufEnd) { @@ -144,13 +186,14 @@ export namespace UTF8 { return REGISTER(REALLOCATE(str, strOff - str)); } - // @ts-ignore: decorator + /** Decodes UTF8 bytes to a string. Zero terminated. */ @unsafe - export function decodeRawDelimited(buf: usize, maxLen: i32 = MAX_BYTELENGTH): string { + static decodeNullTerminatedUnsafe(buf: usize, maxLen: i32 = MAX_BYTELENGTH): string { + if (maxLen > MAX_BYTELENGTH) throw new RangeError(E_INVALIDLENGTH); var bufOff = buf; var bufLim = buf + maxLen; - assert(bufLim > bufOff); // guard wraparound - var str = ALLOCATE(16); // optimize for small strings + assert(bufLim >= bufOff); // guard wraparound + var str = ALLOCATE(min(maxLen, 16)); // optimize for small strings var strLen = 0; while (bufOff < bufLim) { let cp = load(bufOff++); @@ -191,4 +234,16 @@ export namespace UTF8 { } return REGISTER(REALLOCATE(str, strLen)); } + + constructor() { + throw new Error(E_NOTIMPLEMENTED); + } + + write(buf: ArrayBuffer): void { + throw new Error(E_NOTIMPLEMENTED); + } + + end(): string { + throw new Error(E_NOTIMPLEMENTED); + } } diff --git a/std/assembly/index.d.ts b/std/assembly/index.d.ts index ffd00a96..af8bb084 100644 --- a/std/assembly/index.d.ts +++ b/std/assembly/index.d.ts @@ -1254,19 +1254,34 @@ declare class String { toString(): string; } -declare namespace UTF16 { - export function length(str: string): i32; - export function encode(str: string): ArrayBuffer; - export function decode(buf: ArrayBuffer, delimited?: bool): string; - export function decodeRaw(buf: usize, len: i32): string; // unsafe +declare class UTF16Encoder { + /** Calculates the length of a string when encoded as UTF16 bytes. */ + static byteLength(str: string): i32; + /** Encodes a string to UTF16 bytes. */ + static encode(str: string): ArrayBuffer; } -declare namespace UTF8 { - export function length(str: string, delimited?: bool): i32; - export function encode(str: string, delimited?: bool): ArrayBuffer; - export function decode(buf: ArrayBuffer, delimited?: bool): string; - export function decodeRaw(buf: usize, len: i32): string; // unsafe - export function decodeRawDelimited(buf: usize, maxLen?: i32): string; // unsafe +declare class UTF16Decoder { + /** Decodes UTF16 bytes to a string.*/ + static decode(buf: ArrayBuffer): string; + /** Decodes UTF16 bytes to a string. */ + static decodeUnsafe(buf: usize, len: i32): string; +} + +declare class UTF8Encoder { + /** Calculates the length of a string when encoded as UTF8 bytes. */ + static byteLength(str: string, nullTerminated?: bool): i32; + /** Encodes a string as UTF8 bytes. */ + static encode(str: string, nullTerminated?: bool): ArrayBuffer; +} + +declare class UTF8Decoder { + /** Decodes UTF8 bytes to a string.*/ + static decode(buf: ArrayBuffer, nullTerminated?: bool): string; + /** Decodes UTF8 bytes to a string.*/ + static decodeUnsafe(buf: usize, len: i32): string; + /** Decodes UTF8 bytes to a string. */ + static decodeNullTerminatedUnsafe(buf: usize, maxLen?: i32): string; } /** Class for representing a runtime error. Base class of all errors. */ diff --git a/std/assembly/string.ts b/std/assembly/string.ts index 34c18a13..0acbfe33 100644 --- a/std/assembly/string.ts +++ b/std/assembly/string.ts @@ -4,7 +4,6 @@ import { ALLOCATE, REGISTER, HEADER, HEADER_SIZE, MAKEARRAY, ArrayBufferView } f import { MAX_SIZE_32 } from "./util/allocator"; import { compareImpl, parse, CharCode, isWhiteSpaceOrLineTerminator } from "./util/string"; import { E_INVALIDLENGTH } from "./util/error"; -import { UTF8 } from "./encoding"; @sealed export abstract class String { diff --git a/tests/compiler/number.optimized.wat b/tests/compiler/number.optimized.wat index c1b84a5d..89aa7586 100644 --- a/tests/compiler/number.optimized.wat +++ b/tests/compiler/number.optimized.wat @@ -2358,7 +2358,7 @@ if i32.const 0 i32.const 1648 - i32.const 190 + i32.const 189 i32.const 4 call $~lib/env/abort unreachable diff --git a/tests/compiler/number.untouched.wat b/tests/compiler/number.untouched.wat index 27d211e8..3032067b 100644 --- a/tests/compiler/number.untouched.wat +++ b/tests/compiler/number.untouched.wat @@ -3424,7 +3424,7 @@ if i32.const 0 i32.const 1648 - i32.const 190 + i32.const 189 i32.const 4 call $~lib/env/abort unreachable diff --git a/tests/compiler/std/array-access.optimized.wat b/tests/compiler/std/array-access.optimized.wat index 7e9cbe25..c1ffc30d 100644 --- a/tests/compiler/std/array-access.optimized.wat +++ b/tests/compiler/std/array-access.optimized.wat @@ -142,7 +142,7 @@ if i32.const 0 i32.const 64 - i32.const 165 + i32.const 164 i32.const 4 call $~lib/env/abort unreachable diff --git a/tests/compiler/std/array-access.untouched.wat b/tests/compiler/std/array-access.untouched.wat index e65829fd..2d16c9f8 100644 --- a/tests/compiler/std/array-access.untouched.wat +++ b/tests/compiler/std/array-access.untouched.wat @@ -219,7 +219,7 @@ if i32.const 0 i32.const 64 - i32.const 165 + i32.const 164 i32.const 4 call $~lib/env/abort unreachable diff --git a/tests/compiler/std/array.optimized.wat b/tests/compiler/std/array.optimized.wat index 9bc32116..5ce64865 100644 --- a/tests/compiler/std/array.optimized.wat +++ b/tests/compiler/std/array.optimized.wat @@ -6229,7 +6229,7 @@ if i32.const 0 i32.const 4376 - i32.const 190 + i32.const 189 i32.const 4 call $~lib/env/abort unreachable diff --git a/tests/compiler/std/array.untouched.wat b/tests/compiler/std/array.untouched.wat index 4cfd5bf9..6c0bc573 100644 --- a/tests/compiler/std/array.untouched.wat +++ b/tests/compiler/std/array.untouched.wat @@ -8951,7 +8951,7 @@ if i32.const 0 i32.const 4376 - i32.const 40 + i32.const 39 i32.const 4 call $~lib/env/abort unreachable @@ -9590,7 +9590,7 @@ if i32.const 0 i32.const 4376 - i32.const 190 + i32.const 189 i32.const 4 call $~lib/env/abort unreachable diff --git a/tests/compiler/std/encoding.optimized.wat b/tests/compiler/std/encoding.optimized.wat index a16df4a6..07ce2de5 100644 --- a/tests/compiler/std/encoding.optimized.wat +++ b/tests/compiler/std/encoding.optimized.wat @@ -25,7 +25,7 @@ (export "memory" (memory $0)) (export "table" (table $0)) (start $start) - (func $~lib/encoding/UTF8.length (; 1 ;) (type $FUNCSIG$iii) (param $0 i32) (param $1 i32) (result i32) + (func $~lib/encoding/UTF8Encoder.byteLength (; 1 ;) (type $FUNCSIG$iii) (param $0 i32) (param $1 i32) (result i32) (local $2 i32) (local $3 i32) local.get $0 @@ -119,7 +119,7 @@ (func $std/encoding/testUTF8Length (; 2 ;) (type $FUNCSIG$v) global.get $std/encoding/str i32.const 0 - call $~lib/encoding/UTF8.length + call $~lib/encoding/UTF8Encoder.byteLength i32.const 10 i32.ne if @@ -132,7 +132,7 @@ end global.get $std/encoding/str i32.const 1 - call $~lib/encoding/UTF8.length + call $~lib/encoding/UTF8Encoder.byteLength i32.const 11 i32.ne if @@ -260,7 +260,7 @@ i32.store local.get $0 ) - (func $~lib/encoding/UTF8.encode (; 6 ;) (type $FUNCSIG$iii) (param $0 i32) (param $1 i32) (result i32) + (func $~lib/encoding/UTF8Encoder.encode (; 6 ;) (type $FUNCSIG$iii) (param $0 i32) (param $1 i32) (result i32) (local $2 i32) (local $3 i32) (local $4 i32) @@ -279,7 +279,7 @@ local.set $4 local.get $0 local.get $1 - call $~lib/encoding/UTF8.length + call $~lib/encoding/UTF8Encoder.byteLength call $~lib/runtime/allocate local.tee $6 local.set $2 @@ -443,7 +443,7 @@ if i32.const 0 i32.const 80 - i32.const 97 + i32.const 122 i32.const 4 call $~lib/env/abort unreachable @@ -462,7 +462,7 @@ (local $0 i32) global.get $std/encoding/str i32.const 0 - call $~lib/encoding/UTF8.encode + call $~lib/encoding/UTF8Encoder.encode local.tee $0 i32.const 8 i32.sub @@ -598,11 +598,11 @@ unreachable end ) - (func $std/encoding/testUTF8EncodeDelimited (; 8 ;) (type $FUNCSIG$v) + (func $std/encoding/testUTF8EncodeNullTerminated (; 8 ;) (type $FUNCSIG$v) (local $0 i32) global.get $std/encoding/str i32.const 1 - call $~lib/encoding/UTF8.encode + call $~lib/encoding/UTF8Encoder.encode local.tee $0 i32.const 8 i32.sub @@ -2096,26 +2096,42 @@ i32.store offset=4 local.get $0 ) - (func $~lib/encoding/UTF8.decodeRawDelimited (; 13 ;) (type $FUNCSIG$iii) (param $0 i32) (param $1 i32) (result i32) + (func $~lib/encoding/UTF8Decoder.decodeNullTerminatedUnsafe (; 13 ;) (type $FUNCSIG$iii) (param $0 i32) (param $1 i32) (result i32) (local $2 i32) (local $3 i32) (local $4 i32) (local $5 i32) + local.get $1 + i32.const 1073741816 + i32.gt_u + if + i32.const 0 + i32.const 80 + i32.const 192 + i32.const 47 + call $~lib/env/abort + unreachable + end local.get $0 local.get $1 i32.add local.tee $5 local.get $0 - i32.le_u + i32.lt_u if i32.const 0 i32.const 80 - i32.const 152 + i32.const 195 i32.const 4 call $~lib/env/abort unreachable end + local.get $1 i32.const 16 + local.get $1 + i32.const 16 + i32.lt_s + select call $~lib/runtime/allocate local.set $4 loop $continue|0 @@ -2326,15 +2342,36 @@ i32.const 1 call $~lib/runtime/register ) - (func $~lib/encoding/UTF8.decodeRaw (; 14 ;) (type $FUNCSIG$iii) (param $0 i32) (param $1 i32) (result i32) + (func $~lib/encoding/UTF8Decoder.decodeUnsafe (; 14 ;) (type $FUNCSIG$iii) (param $0 i32) (param $1 i32) (result i32) (local $2 i32) (local $3 i32) (local $4 i32) (local $5 i32) + local.get $1 + i32.const 1073741816 + i32.gt_u + if + i32.const 0 + i32.const 80 + i32.const 152 + i32.const 44 + call $~lib/env/abort + unreachable + end local.get $0 local.get $1 i32.add - local.set $5 + local.tee $5 + local.get $0 + i32.lt_u + if + i32.const 0 + i32.const 80 + i32.const 155 + i32.const 4 + call $~lib/env/abort + unreachable + end local.get $1 i32.const 1 i32.shl @@ -2500,7 +2537,7 @@ i32.const 1 call $~lib/runtime/register ) - (func $~lib/encoding/UTF8.decode (; 15 ;) (type $FUNCSIG$iii) (param $0 i32) (param $1 i32) (result i32) + (func $~lib/encoding/UTF8Decoder.decode (; 15 ;) (type $FUNCSIG$iii) (param $0 i32) (param $1 i32) (result i32) local.get $1 if (result i32) local.get $0 @@ -2508,14 +2545,14 @@ i32.const 8 i32.sub i32.load offset=4 - call $~lib/encoding/UTF8.decodeRawDelimited + call $~lib/encoding/UTF8Decoder.decodeNullTerminatedUnsafe else local.get $0 local.get $0 i32.const 8 i32.sub i32.load offset=4 - call $~lib/encoding/UTF8.decodeRaw + call $~lib/encoding/UTF8Decoder.decodeUnsafe end ) (func $~lib/util/string/compareImpl (; 16 ;) (type $FUNCSIG$iiii) (param $0 i32) (param $1 i32) (param $2 i32) (result i32) @@ -2600,9 +2637,9 @@ (func $std/encoding/testUTF8Decode (; 18 ;) (type $FUNCSIG$v) global.get $std/encoding/str i32.const 0 - call $~lib/encoding/UTF8.encode + call $~lib/encoding/UTF8Encoder.encode i32.const 0 - call $~lib/encoding/UTF8.decode + call $~lib/encoding/UTF8Decoder.decode global.get $std/encoding/str call $~lib/string/String.__eq i32.eqz @@ -2615,12 +2652,12 @@ unreachable end ) - (func $std/encoding/testUTF8DecodeDelimited (; 19 ;) (type $FUNCSIG$v) + (func $std/encoding/testUTF8DecodeNullTerminated (; 19 ;) (type $FUNCSIG$v) global.get $std/encoding/str i32.const 1 - call $~lib/encoding/UTF8.encode + call $~lib/encoding/UTF8Encoder.encode i32.const 1 - call $~lib/encoding/UTF8.decode + call $~lib/encoding/UTF8Decoder.decode global.get $std/encoding/str call $~lib/string/String.__eq i32.eqz @@ -2633,14 +2670,14 @@ unreachable end ) - (func $std/encoding/testUTF8Raw (; 20 ;) (type $FUNCSIG$v) + (func $std/encoding/testUTF8DecodeUnsafe (; 20 ;) (type $FUNCSIG$v) (local $0 i32) global.get $std/encoding/str i32.const 0 - call $~lib/encoding/UTF8.encode + call $~lib/encoding/UTF8Encoder.encode local.tee $0 i32.const 0 - call $~lib/encoding/UTF8.decodeRaw + call $~lib/encoding/UTF8Decoder.decodeUnsafe i32.const 160 call $~lib/string/String.__eq i32.eqz @@ -2655,8 +2692,8 @@ local.get $0 global.get $std/encoding/str i32.const 0 - call $~lib/encoding/UTF8.length - call $~lib/encoding/UTF8.decodeRaw + call $~lib/encoding/UTF8Encoder.byteLength + call $~lib/encoding/UTF8Decoder.decodeUnsafe global.get $std/encoding/str call $~lib/string/String.__eq i32.eqz @@ -2670,7 +2707,7 @@ end local.get $0 i32.const 4 - call $~lib/encoding/UTF8.decodeRaw + call $~lib/encoding/UTF8Decoder.decodeUnsafe i32.const 168 call $~lib/string/String.__eq i32.eqz @@ -2686,7 +2723,7 @@ i32.const 4 i32.add i32.const 2 - call $~lib/encoding/UTF8.decodeRaw + call $~lib/encoding/UTF8Decoder.decodeUnsafe i32.const 184 call $~lib/string/String.__eq i32.eqz @@ -2702,7 +2739,7 @@ i32.const 6 i32.add i32.const 4 - call $~lib/encoding/UTF8.decodeRaw + call $~lib/encoding/UTF8Decoder.decodeUnsafe i32.const 200 call $~lib/string/String.__eq i32.eqz @@ -2718,7 +2755,7 @@ i32.const 10 i32.add i32.const 0 - call $~lib/encoding/UTF8.decodeRaw + call $~lib/encoding/UTF8Decoder.decodeUnsafe i32.const 160 call $~lib/string/String.__eq i32.eqz @@ -2737,7 +2774,7 @@ i32.const 4 i32.add i32.const 1073741816 - call $~lib/encoding/UTF8.decodeRawDelimited + call $~lib/encoding/UTF8Decoder.decodeNullTerminatedUnsafe i32.const 216 call $~lib/string/String.__eq i32.eqz @@ -2753,7 +2790,7 @@ i32.const 6 i32.add i32.const 1073741816 - call $~lib/encoding/UTF8.decodeRawDelimited + call $~lib/encoding/UTF8Decoder.decodeNullTerminatedUnsafe i32.const 200 call $~lib/string/String.__eq i32.eqz @@ -2769,7 +2806,7 @@ i32.const 10 i32.add i32.const 1073741816 - call $~lib/encoding/UTF8.decodeRawDelimited + call $~lib/encoding/UTF8Decoder.decodeNullTerminatedUnsafe i32.const 160 call $~lib/string/String.__eq i32.eqz @@ -2789,10 +2826,10 @@ global.get $~lib/allocator/arena/startOffset global.set $~lib/allocator/arena/offset call $std/encoding/testUTF8Encode - call $std/encoding/testUTF8EncodeDelimited + call $std/encoding/testUTF8EncodeNullTerminated call $std/encoding/testUTF8Decode - call $std/encoding/testUTF8DecodeDelimited - call $std/encoding/testUTF8Raw + call $std/encoding/testUTF8DecodeNullTerminated + call $std/encoding/testUTF8DecodeUnsafe ) (func $null (; 22 ;) (type $FUNCSIG$v) nop diff --git a/tests/compiler/std/encoding.ts b/tests/compiler/std/encoding.ts index a2cc80b2..6ea0d2a3 100644 --- a/tests/compiler/std/encoding.ts +++ b/tests/compiler/std/encoding.ts @@ -3,13 +3,13 @@ import "allocator/arena"; var str = "𐐷hi𤭢"; // -> f0 90 90 b7 68 69 f0 a4 ad a2 [00] function testUTF8Length(): void { - assert(UTF8.length(str) == 10); - assert(UTF8.length(str, true) == 11); + assert(UTF8Encoder.byteLength(str) == 10); + assert(UTF8Encoder.byteLength(str, true) == 11); } testUTF8Length(); function testUTF8Encode(): void { - var buf = UTF8.encode(str); + var buf = UTF8Encoder.encode(str); var ptr = changetype(buf); assert(buf.byteLength == 10); assert(load(ptr, 0) == 0xf0); @@ -25,8 +25,8 @@ function testUTF8Encode(): void { } testUTF8Encode(); -function testUTF8EncodeDelimited(): void { - var buf = UTF8.encode(str, true); +function testUTF8EncodeNullTerminated(): void { + var buf = UTF8Encoder.encode(str, true); var ptr = changetype(buf); assert(buf.byteLength == 11); assert(load(ptr, 0) == 0xf0); @@ -41,33 +41,33 @@ function testUTF8EncodeDelimited(): void { assert(load(ptr, 9) == 0xa2); assert(load(ptr, 10) == 0x00); } -testUTF8EncodeDelimited(); +testUTF8EncodeNullTerminated(); function testUTF8Decode(): void { - var buf = UTF8.encode(str); - assert(UTF8.decode(buf) == str); + var buf = UTF8Encoder.encode(str); + assert(UTF8Decoder.decode(buf) == str); } testUTF8Decode(); -function testUTF8DecodeDelimited(): void { - var buf = UTF8.encode(str, true); - assert(UTF8.decode(buf, true) == str); +function testUTF8DecodeNullTerminated(): void { + var buf = UTF8Encoder.encode(str, true); + assert(UTF8Decoder.decode(buf, true) == str); } -testUTF8DecodeDelimited(); +testUTF8DecodeNullTerminated(); -function testUTF8Raw(): void { - var buf = changetype(UTF8.encode(str)); +function testUTF8DecodeUnsafe(): void { + var buf = changetype(UTF8Encoder.encode(str)); - assert(UTF8.decodeRaw(buf, 0) == ""); - assert(UTF8.decodeRaw(buf, UTF8.length(str)) == str); - assert(UTF8.decodeRaw(buf, 4) == "𐐷"); - assert(UTF8.decodeRaw(buf + 4, 2) == "hi"); - assert(UTF8.decodeRaw(buf + 6, 4) == "𤭢"); - assert(UTF8.decodeRaw(buf + 10, 0) == ""); + assert(UTF8Decoder.decodeUnsafe(buf, 0) == ""); + assert(UTF8Decoder.decodeUnsafe(buf, UTF8Encoder.byteLength(str)) == str); + assert(UTF8Decoder.decodeUnsafe(buf, 4) == "𐐷"); + assert(UTF8Decoder.decodeUnsafe(buf + 4, 2) == "hi"); + assert(UTF8Decoder.decodeUnsafe(buf + 6, 4) == "𤭢"); + assert(UTF8Decoder.decodeUnsafe(buf + 10, 0) == ""); store(buf, 0, 10); // don't try this at home - assert(UTF8.decodeRawDelimited(buf + 4) == "hi𤭢"); - assert(UTF8.decodeRawDelimited(buf + 6) == "𤭢"); - assert(UTF8.decodeRawDelimited(buf + 10) == ""); + assert(UTF8Decoder.decodeNullTerminatedUnsafe(buf + 4) == "hi𤭢"); + assert(UTF8Decoder.decodeNullTerminatedUnsafe(buf + 6) == "𤭢"); + assert(UTF8Decoder.decodeNullTerminatedUnsafe(buf + 10) == ""); } -testUTF8Raw(); +testUTF8DecodeUnsafe(); diff --git a/tests/compiler/std/encoding.untouched.wat b/tests/compiler/std/encoding.untouched.wat index 3180ad1a..031d7f71 100644 --- a/tests/compiler/std/encoding.untouched.wat +++ b/tests/compiler/std/encoding.untouched.wat @@ -38,7 +38,7 @@ i32.const 1 i32.shr_u ) - (func $~lib/encoding/UTF8.length (; 2 ;) (type $FUNCSIG$iii) (param $0 i32) (param $1 i32) (result i32) + (func $~lib/encoding/UTF8Encoder.byteLength (; 2 ;) (type $FUNCSIG$iii) (param $0 i32) (param $1 i32) (result i32) (local $2 i32) (local $3 i32) (local $4 i32) @@ -152,7 +152,7 @@ (func $std/encoding/testUTF8Length (; 3 ;) (type $FUNCSIG$v) global.get $std/encoding/str i32.const 0 - call $~lib/encoding/UTF8.length + call $~lib/encoding/UTF8Encoder.byteLength i32.const 10 i32.eq i32.eqz @@ -166,7 +166,7 @@ end global.get $std/encoding/str i32.const 1 - call $~lib/encoding/UTF8.length + call $~lib/encoding/UTF8Encoder.byteLength i32.const 11 i32.eq i32.eqz @@ -327,7 +327,7 @@ i32.store local.get $0 ) - (func $~lib/encoding/UTF8.encode (; 9 ;) (type $FUNCSIG$iii) (param $0 i32) (param $1 i32) (result i32) + (func $~lib/encoding/UTF8Encoder.encode (; 9 ;) (type $FUNCSIG$iii) (param $0 i32) (param $1 i32) (result i32) (local $2 i32) (local $3 i32) (local $4 i32) @@ -346,7 +346,7 @@ block $~lib/runtime/ALLOCATE|inlined.0 (result i32) local.get $0 local.get $1 - call $~lib/encoding/UTF8.length + call $~lib/encoding/UTF8Encoder.byteLength local.set $4 local.get $4 call $~lib/runtime/allocate @@ -532,7 +532,7 @@ if i32.const 0 i32.const 80 - i32.const 97 + i32.const 122 i32.const 4 call $~lib/env/abort unreachable @@ -564,7 +564,7 @@ (local $1 i32) global.get $std/encoding/str i32.const 0 - call $~lib/encoding/UTF8.encode + call $~lib/encoding/UTF8Encoder.encode local.set $0 local.get $0 local.set $1 @@ -712,12 +712,12 @@ unreachable end ) - (func $std/encoding/testUTF8EncodeDelimited (; 12 ;) (type $FUNCSIG$v) + (func $std/encoding/testUTF8EncodeNullTerminated (; 12 ;) (type $FUNCSIG$v) (local $0 i32) (local $1 i32) global.get $std/encoding/str i32.const 1 - call $~lib/encoding/UTF8.encode + call $~lib/encoding/UTF8Encoder.encode local.set $0 local.get $0 local.set $1 @@ -2671,7 +2671,7 @@ i32.store offset=4 local.get $0 ) - (func $~lib/encoding/UTF8.decodeRawDelimited (; 19 ;) (type $FUNCSIG$iii) (param $0 i32) (param $1 i32) (result i32) + (func $~lib/encoding/UTF8Decoder.decodeNullTerminatedUnsafe (; 19 ;) (type $FUNCSIG$iii) (param $0 i32) (param $1 i32) (result i32) (local $2 i32) (local $3 i32) (local $4 i32) @@ -2679,6 +2679,17 @@ (local $6 i32) (local $7 i32) (local $8 i32) + local.get $1 + global.get $~lib/runtime/MAX_BYTELENGTH + i32.gt_u + if + i32.const 0 + i32.const 80 + i32.const 192 + i32.const 47 + call $~lib/env/abort + unreachable + end local.get $0 local.set $2 local.get $0 @@ -2687,25 +2698,32 @@ local.set $3 local.get $3 local.get $2 - i32.gt_u + i32.ge_u i32.eqz if i32.const 0 i32.const 80 - i32.const 152 + i32.const 195 i32.const 4 call $~lib/env/abort unreachable end block $~lib/runtime/ALLOCATE|inlined.1 (result i32) + local.get $1 + local.tee $5 i32.const 16 + local.tee $6 + local.get $5 + local.get $6 + i32.lt_s + select local.set $4 local.get $4 call $~lib/runtime/allocate end - local.set $5 + local.set $7 i32.const 0 - local.set $6 + local.set $8 block $break|0 loop $continue|0 local.get $2 @@ -2733,37 +2751,37 @@ br $break|0 end block $~lib/runtime/REALLOCATE|inlined.0 (result i32) - local.get $5 - local.set $8 - local.get $6 + local.get $7 + local.set $6 + local.get $8 i32.const 2 i32.add - local.set $7 - local.get $8 - local.get $7 + local.set $5 + local.get $6 + local.get $5 call $~lib/runtime/reallocate end - local.set $5 - local.get $5 - local.get $6 + local.set $7 + local.get $7 + local.get $8 i32.add local.get $4 i32.store16 - local.get $6 + local.get $8 i32.const 2 i32.add - local.set $6 + local.set $8 else local.get $4 i32.const 191 i32.gt_u - local.tee $7 + local.tee $5 if (result i32) local.get $4 i32.const 224 i32.lt_u else - local.get $7 + local.get $5 end if local.get $2 @@ -2773,19 +2791,19 @@ br $break|0 end block $~lib/runtime/REALLOCATE|inlined.1 (result i32) - local.get $5 - local.set $8 - local.get $6 + local.get $7 + local.set $6 + local.get $8 i32.const 2 i32.add - local.set $7 - local.get $8 - local.get $7 + local.set $5 + local.get $6 + local.get $5 call $~lib/runtime/reallocate end - local.set $5 - local.get $5 - local.get $6 + local.set $7 + local.get $7 + local.get $8 i32.add local.get $4 i32.const 31 @@ -2794,32 +2812,32 @@ i32.shl block (result i32) local.get $2 - local.tee $7 + local.tee $5 i32.const 1 i32.add local.set $2 - local.get $7 + local.get $5 end i32.load8_u i32.const 63 i32.and i32.or i32.store16 - local.get $6 + local.get $8 i32.const 2 i32.add - local.set $6 + local.set $8 else local.get $4 i32.const 239 i32.gt_u - local.tee $7 + local.tee $5 if (result i32) local.get $4 i32.const 365 i32.lt_u else - local.get $7 + local.get $5 end if local.get $2 @@ -2862,39 +2880,39 @@ i32.add local.set $2 block $~lib/runtime/REALLOCATE|inlined.2 (result i32) - local.get $5 - local.set $8 - local.get $6 + local.get $7 + local.set $6 + local.get $8 i32.const 4 i32.add - local.set $7 - local.get $8 - local.get $7 + local.set $5 + local.get $6 + local.get $5 call $~lib/runtime/reallocate end - local.set $5 - local.get $5 - local.get $6 - i32.add local.set $7 local.get $7 + local.get $8 + i32.add + local.set $5 + local.get $5 i32.const 55296 local.get $4 i32.const 10 i32.shr_u i32.add i32.store16 - local.get $7 + local.get $5 i32.const 56320 local.get $4 i32.const 1023 i32.and i32.add i32.store16 offset=2 - local.get $6 + local.get $8 i32.const 4 i32.add - local.set $6 + local.set $8 else local.get $2 i32.const 2 @@ -2905,19 +2923,19 @@ br $break|0 end block $~lib/runtime/REALLOCATE|inlined.3 (result i32) - local.get $5 - local.set $8 - local.get $6 + local.get $7 + local.set $6 + local.get $8 i32.const 2 i32.add - local.set $7 - local.get $8 - local.get $7 + local.set $5 + local.get $6 + local.get $5 call $~lib/runtime/reallocate end - local.set $5 - local.get $5 - local.get $6 + local.set $7 + local.get $7 + local.get $8 i32.add local.get $4 i32.const 15 @@ -2941,10 +2959,10 @@ i32.const 2 i32.add local.set $2 - local.get $6 + local.get $8 i32.const 2 i32.add - local.set $6 + local.set $8 end end end @@ -2955,12 +2973,12 @@ end block $~lib/runtime/REGISTER<~lib/string/String>|inlined.0 (result i32) block $~lib/runtime/REALLOCATE|inlined.4 (result i32) - local.get $5 - local.set $8 - local.get $6 - local.set $7 - local.get $8 local.get $7 + local.set $6 + local.get $8 + local.set $5 + local.get $6 + local.get $5 call $~lib/runtime/reallocate end local.set $4 @@ -2969,7 +2987,7 @@ call $~lib/runtime/register end ) - (func $~lib/encoding/UTF8.decodeRaw (; 20 ;) (type $FUNCSIG$iii) (param $0 i32) (param $1 i32) (result i32) + (func $~lib/encoding/UTF8Decoder.decodeUnsafe (; 20 ;) (type $FUNCSIG$iii) (param $0 i32) (param $1 i32) (result i32) (local $2 i32) (local $3 i32) (local $4 i32) @@ -2977,12 +2995,35 @@ (local $6 i32) (local $7 i32) (local $8 i32) + local.get $1 + global.get $~lib/runtime/MAX_BYTELENGTH + i32.gt_u + if + i32.const 0 + i32.const 80 + i32.const 152 + i32.const 44 + call $~lib/env/abort + unreachable + end local.get $0 local.set $2 local.get $0 local.get $1 i32.add local.set $3 + local.get $3 + local.get $2 + i32.ge_u + i32.eqz + if + i32.const 0 + i32.const 80 + i32.const 155 + i32.const 4 + call $~lib/env/abort + unreachable + end block $~lib/runtime/ALLOCATE|inlined.2 (result i32) local.get $1 i32.const 1 @@ -3174,7 +3215,7 @@ call $~lib/runtime/register end ) - (func $~lib/encoding/UTF8.decode (; 21 ;) (type $FUNCSIG$iii) (param $0 i32) (param $1 i32) (result i32) + (func $~lib/encoding/UTF8Decoder.decode (; 21 ;) (type $FUNCSIG$iii) (param $0 i32) (param $1 i32) (result i32) local.get $1 i32.const 0 i32.ne @@ -3182,12 +3223,12 @@ local.get $0 local.get $0 call $~lib/arraybuffer/ArrayBuffer#get:byteLength - call $~lib/encoding/UTF8.decodeRawDelimited + call $~lib/encoding/UTF8Decoder.decodeNullTerminatedUnsafe else local.get $0 local.get $0 call $~lib/arraybuffer/ArrayBuffer#get:byteLength - call $~lib/encoding/UTF8.decodeRaw + call $~lib/encoding/UTF8Decoder.decodeUnsafe end ) (func $~lib/util/string/compareImpl (; 22 ;) (type $FUNCSIG$iiiiii) (param $0 i32) (param $1 i32) (param $2 i32) (param $3 i32) (param $4 i32) (result i32) @@ -3291,11 +3332,11 @@ (local $0 i32) global.get $std/encoding/str i32.const 0 - call $~lib/encoding/UTF8.encode + call $~lib/encoding/UTF8Encoder.encode local.set $0 local.get $0 i32.const 0 - call $~lib/encoding/UTF8.decode + call $~lib/encoding/UTF8Decoder.decode global.get $std/encoding/str call $~lib/string/String.__eq i32.eqz @@ -3308,15 +3349,15 @@ unreachable end ) - (func $std/encoding/testUTF8DecodeDelimited (; 25 ;) (type $FUNCSIG$v) + (func $std/encoding/testUTF8DecodeNullTerminated (; 25 ;) (type $FUNCSIG$v) (local $0 i32) global.get $std/encoding/str i32.const 1 - call $~lib/encoding/UTF8.encode + call $~lib/encoding/UTF8Encoder.encode local.set $0 local.get $0 i32.const 1 - call $~lib/encoding/UTF8.decode + call $~lib/encoding/UTF8Decoder.decode global.get $std/encoding/str call $~lib/string/String.__eq i32.eqz @@ -3329,15 +3370,15 @@ unreachable end ) - (func $std/encoding/testUTF8Raw (; 26 ;) (type $FUNCSIG$v) + (func $std/encoding/testUTF8DecodeUnsafe (; 26 ;) (type $FUNCSIG$v) (local $0 i32) global.get $std/encoding/str i32.const 0 - call $~lib/encoding/UTF8.encode + call $~lib/encoding/UTF8Encoder.encode local.set $0 local.get $0 i32.const 0 - call $~lib/encoding/UTF8.decodeRaw + call $~lib/encoding/UTF8Decoder.decodeUnsafe i32.const 160 call $~lib/string/String.__eq i32.eqz @@ -3352,8 +3393,8 @@ local.get $0 global.get $std/encoding/str i32.const 0 - call $~lib/encoding/UTF8.length - call $~lib/encoding/UTF8.decodeRaw + call $~lib/encoding/UTF8Encoder.byteLength + call $~lib/encoding/UTF8Decoder.decodeUnsafe global.get $std/encoding/str call $~lib/string/String.__eq i32.eqz @@ -3367,7 +3408,7 @@ end local.get $0 i32.const 4 - call $~lib/encoding/UTF8.decodeRaw + call $~lib/encoding/UTF8Decoder.decodeUnsafe i32.const 168 call $~lib/string/String.__eq i32.eqz @@ -3383,7 +3424,7 @@ i32.const 4 i32.add i32.const 2 - call $~lib/encoding/UTF8.decodeRaw + call $~lib/encoding/UTF8Decoder.decodeUnsafe i32.const 184 call $~lib/string/String.__eq i32.eqz @@ -3399,7 +3440,7 @@ i32.const 6 i32.add i32.const 4 - call $~lib/encoding/UTF8.decodeRaw + call $~lib/encoding/UTF8Decoder.decodeUnsafe i32.const 200 call $~lib/string/String.__eq i32.eqz @@ -3415,7 +3456,7 @@ i32.const 10 i32.add i32.const 0 - call $~lib/encoding/UTF8.decodeRaw + call $~lib/encoding/UTF8Decoder.decodeUnsafe i32.const 160 call $~lib/string/String.__eq i32.eqz @@ -3434,7 +3475,7 @@ i32.const 4 i32.add global.get $~lib/runtime/MAX_BYTELENGTH - call $~lib/encoding/UTF8.decodeRawDelimited + call $~lib/encoding/UTF8Decoder.decodeNullTerminatedUnsafe i32.const 216 call $~lib/string/String.__eq i32.eqz @@ -3450,7 +3491,7 @@ i32.const 6 i32.add global.get $~lib/runtime/MAX_BYTELENGTH - call $~lib/encoding/UTF8.decodeRawDelimited + call $~lib/encoding/UTF8Decoder.decodeNullTerminatedUnsafe i32.const 200 call $~lib/string/String.__eq i32.eqz @@ -3466,7 +3507,7 @@ i32.const 10 i32.add global.get $~lib/runtime/MAX_BYTELENGTH - call $~lib/encoding/UTF8.decodeRawDelimited + call $~lib/encoding/UTF8Decoder.decodeNullTerminatedUnsafe i32.const 160 call $~lib/string/String.__eq i32.eqz @@ -3492,10 +3533,10 @@ global.get $~lib/allocator/arena/startOffset global.set $~lib/allocator/arena/offset call $std/encoding/testUTF8Encode - call $std/encoding/testUTF8EncodeDelimited + call $std/encoding/testUTF8EncodeNullTerminated call $std/encoding/testUTF8Decode - call $std/encoding/testUTF8DecodeDelimited - call $std/encoding/testUTF8Raw + call $std/encoding/testUTF8DecodeNullTerminated + call $std/encoding/testUTF8DecodeUnsafe ) (func $start (; 28 ;) (type $FUNCSIG$v) call $start:std/encoding diff --git a/tests/compiler/std/string.optimized.wat b/tests/compiler/std/string.optimized.wat index a3b17e9b..c8479277 100644 --- a/tests/compiler/std/string.optimized.wat +++ b/tests/compiler/std/string.optimized.wat @@ -575,7 +575,7 @@ if i32.const 0 i32.const 216 - i32.const 25 + i32.const 24 i32.const 4 call $~lib/env/abort unreachable @@ -630,7 +630,7 @@ if i32.const 0 i32.const 216 - i32.const 165 + i32.const 164 i32.const 4 call $~lib/env/abort unreachable @@ -678,7 +678,7 @@ if i32.const 0 i32.const 216 - i32.const 78 + i32.const 77 i32.const 4 call $~lib/env/abort unreachable @@ -726,7 +726,7 @@ if i32.const 0 i32.const 216 - i32.const 134 + i32.const 133 i32.const 4 call $~lib/env/abort unreachable @@ -1877,7 +1877,7 @@ if i32.const 0 i32.const 216 - i32.const 282 + i32.const 281 i32.const 4 call $~lib/env/abort unreachable @@ -1972,7 +1972,7 @@ if i32.const 0 i32.const 216 - i32.const 303 + i32.const 302 i32.const 4 call $~lib/env/abort unreachable @@ -2068,7 +2068,7 @@ if i32.const 0 i32.const 216 - i32.const 150 + i32.const 149 i32.const 4 call $~lib/env/abort unreachable @@ -2490,7 +2490,7 @@ if i32.const 0 i32.const 216 - i32.const 464 + i32.const 463 i32.const 10 call $~lib/env/abort unreachable @@ -2762,7 +2762,7 @@ if i32.const 0 i32.const 216 - i32.const 324 + i32.const 323 i32.const 4 call $~lib/env/abort unreachable @@ -2792,7 +2792,7 @@ if i32.const 0 i32.const 216 - i32.const 329 + i32.const 328 i32.const 6 call $~lib/env/abort unreachable @@ -3354,7 +3354,7 @@ if i32.const 0 i32.const 216 - i32.const 351 + i32.const 350 i32.const 4 call $~lib/env/abort unreachable @@ -5072,7 +5072,7 @@ if i32.const 0 i32.const 216 - i32.const 190 + i32.const 189 i32.const 4 call $~lib/env/abort unreachable diff --git a/tests/compiler/std/string.untouched.wat b/tests/compiler/std/string.untouched.wat index 91eebc92..8fa9e205 100644 --- a/tests/compiler/std/string.untouched.wat +++ b/tests/compiler/std/string.untouched.wat @@ -524,7 +524,7 @@ if i32.const 0 i32.const 216 - i32.const 25 + i32.const 24 i32.const 4 call $~lib/env/abort unreachable @@ -597,7 +597,7 @@ if i32.const 0 i32.const 216 - i32.const 165 + i32.const 164 i32.const 4 call $~lib/env/abort unreachable @@ -663,7 +663,7 @@ if i32.const 0 i32.const 216 - i32.const 78 + i32.const 77 i32.const 4 call $~lib/env/abort unreachable @@ -727,7 +727,7 @@ if i32.const 0 i32.const 216 - i32.const 134 + i32.const 133 i32.const 4 call $~lib/env/abort unreachable @@ -2288,7 +2288,7 @@ if i32.const 0 i32.const 216 - i32.const 282 + i32.const 281 i32.const 4 call $~lib/env/abort unreachable @@ -2397,7 +2397,7 @@ if i32.const 0 i32.const 216 - i32.const 303 + i32.const 302 i32.const 4 call $~lib/env/abort unreachable @@ -2507,7 +2507,7 @@ if i32.const 0 i32.const 216 - i32.const 150 + i32.const 149 i32.const 4 call $~lib/env/abort unreachable @@ -3021,7 +3021,7 @@ if i32.const 0 i32.const 216 - i32.const 464 + i32.const 463 i32.const 10 call $~lib/env/abort unreachable @@ -3311,7 +3311,7 @@ if i32.const 0 i32.const 216 - i32.const 324 + i32.const 323 i32.const 4 call $~lib/env/abort unreachable @@ -3339,7 +3339,7 @@ if i32.const 0 i32.const 216 - i32.const 329 + i32.const 328 i32.const 6 call $~lib/env/abort unreachable @@ -4110,7 +4110,7 @@ if i32.const 0 i32.const 216 - i32.const 351 + i32.const 350 i32.const 4 call $~lib/env/abort unreachable @@ -6587,7 +6587,7 @@ if i32.const 0 i32.const 216 - i32.const 190 + i32.const 189 i32.const 4 call $~lib/env/abort unreachable