diff --git a/crates/cli-support/src/js/mod.rs b/crates/cli-support/src/js/mod.rs index f5dade92..10946869 100644 --- a/crates/cli-support/src/js/mod.rs +++ b/crates/cli-support/src/js/mod.rs @@ -1445,18 +1445,48 @@ impl<'a> Context<'a> { self.expose_text_encoder(); self.expose_uint8_memory(); + // A fast path that directly writes char codes into WASM memory as long + // as it finds only ASCII characters. + // + // This is much faster for common ASCII strings because it can avoid + // calling out into C++ TextEncoder code. + // + // This might be not very intuitive, but such calls are usually more + // expensive in mainstream engines than staying in the JS, and + // charCodeAt on ASCII strings is usually optimised to raw bytes. + let start_encoding_as_ascii = format!( + " + {} + let size = arg.length; + let ptr = wasm.__wbindgen_malloc(size); + let offset = 0; + {{ + const mem = getUint8Memory(); + for (; offset < arg.length; offset++) {{ + const code = arg.charCodeAt(offset); + if (code > 0x7F) break; + mem[ptr + offset] = code; + }} + }} + ", + debug + ); + // The first implementation we have for this is to use // `TextEncoder#encode` which has been around for quite some time. let use_encode = format!( " {} - const buf = cachedTextEncoder.encode(arg); - const ptr = wasm.__wbindgen_malloc(buf.length); - getUint8Memory().set(buf, ptr); - WASM_VECTOR_LEN = buf.length; + if (offset !== arg.length) {{ + const buf = cachedTextEncoder.encode(arg.slice(offset)); + ptr = wasm.__wbindgen_realloc(ptr, size, size = offset + buf.length); + getUint8Memory().set(buf, ptr + offset); + offset += buf.length; + }} + WASM_VECTOR_LEN = offset; return ptr; ", - debug + start_encoding_as_ascii ); // Another possibility is to use `TextEncoder#encodeInto` which is much @@ -1465,23 +1495,23 @@ impl<'a> Context<'a> { let use_encode_into = format!( " {} - let size = arg.length; - let ptr = wasm.__wbindgen_malloc(size); - let writeOffset = 0; - while (true) {{ - const view = getUint8Memory().subarray(ptr + writeOffset, ptr + size); - const {{ read, written }} = cachedTextEncoder.encodeInto(arg, view); - writeOffset += written; - if (read === arg.length) {{ - break; - }} - arg = arg.substring(read); - ptr = wasm.__wbindgen_realloc(ptr, size, size += arg.length * 3); + if (offset !== arg.length) {{ + arg = arg.slice(offset); + ptr = wasm.__wbindgen_realloc(ptr, size, size = offset + arg.length * 3); + const view = getUint8Memory().subarray(ptr + offset, ptr + size); + const ret = cachedTextEncoder.encodeInto(arg, view); + {} + offset += cachedTextEncoder.encodeInto(arg, view).written; }} - WASM_VECTOR_LEN = writeOffset; + WASM_VECTOR_LEN = offset; return ptr; ", - debug + start_encoding_as_ascii, + if self.config.debug { + "if (ret.read != arg.length) throw new Error('failed to pass whole string');" + } else { + "" + }, ); // Looks like `encodeInto` doesn't currently work when the memory passed diff --git a/tests/headless/main.rs b/tests/headless/main.rs index fcc3ac6c..88bffc75 100755 --- a/tests/headless/main.rs +++ b/tests/headless/main.rs @@ -50,3 +50,4 @@ pub fn import_export_same_name() { pub mod snippets; pub mod modules; pub mod anyref_heap_live_count; +pub mod strings; diff --git a/tests/headless/strings.js b/tests/headless/strings.js new file mode 100644 index 00000000..f22b994b --- /dev/null +++ b/tests/headless/strings.js @@ -0,0 +1,15 @@ +export function test_string_roundtrip(f) { + const test = expected => { + const actual = f(expected); + if (actual === expected) + return; + throw new Error(`string roundtrip "${actual}" != "${expected}"`); + }; + + test(''); + test('a'); + test('💖'); + + test('a longer string'); + test('a longer 💖 string'); +} diff --git a/tests/headless/strings.rs b/tests/headless/strings.rs new file mode 100644 index 00000000..1c752d75 --- /dev/null +++ b/tests/headless/strings.rs @@ -0,0 +1,12 @@ +use wasm_bindgen::prelude::*; +use wasm_bindgen_test::*; + +#[wasm_bindgen(module = "/tests/headless/strings.js")] +extern "C" { + fn test_string_roundtrip(c: &Closure String>); +} + +#[wasm_bindgen_test] +fn string_roundtrip() { + test_string_roundtrip(&Closure::wrap(Box::new(|s| s))); +} diff --git a/tests/wasm/simple.js b/tests/wasm/simple.js index c9b1ba64..5ba8b0e5 100644 --- a/tests/wasm/simple.js +++ b/tests/wasm/simple.js @@ -92,3 +92,16 @@ exports.RenamedInRust = class {}; exports.new_renamed = () => new exports.RenamedInRust; exports.import_export_same_name = () => {}; + +exports.test_string_roundtrip = () => { + const test = s => { + assert.strictEqual(wasm.do_string_roundtrip(s), s); + }; + + test(''); + test('a'); + test('💖'); + + test('a longer string'); + test('a longer 💖 string'); +}; diff --git a/tests/wasm/simple.rs b/tests/wasm/simple.rs index b9f89df0..814cbcc0 100644 --- a/tests/wasm/simple.rs +++ b/tests/wasm/simple.rs @@ -27,6 +27,8 @@ extern "C" { #[wasm_bindgen(js_name = RenamedInRust)] type Renamed; fn new_renamed() -> Renamed; + + fn test_string_roundtrip(); } #[wasm_bindgen_test] @@ -201,3 +203,13 @@ fn renaming_imports_and_instanceof() { pub fn import_export_same_name() { js_import_export_same_name(); } + +#[wasm_bindgen_test] +fn string_roundtrip() { + test_string_roundtrip(); +} + +#[wasm_bindgen] +pub fn do_string_roundtrip(s: String) -> String { + s +}