Chunk data sections even smaller per #18 and update README explanation

2025-07-01 23:41:35 +00:00 · 2018-07-26 00:27:17 -05:00
parent 1127b61eb5
commit 73862e9bc9
3 changed files with 14 additions and 9 deletions
--- a/README.md
+++ b/README.md
@ -374,8 +374,9 @@ and the JVM:
 * WebAssembly has a nice data section for byte arrays whereas the JVM does not. Right now we use a single-byte-char
  string constant (i.e. ISO-8859 charset). This saves class file size, but this means we call `String::getBytes` on
  init to load bytes from the string constant. Due to the JVM using an unsigned 16-bit int as the string constant
-  length, the maximum length is 65536, so we chunk data sections into as many max-65500-byte lengths we need to load it
-  all.
+  length, the maximum byte length is 65536. Since the string constants are stored as UTF-8 constants, they can be up to
+  four bytes a character. Therefore, we populate memory in data chunks no larger than 16300 (nice round number to make
+  sure that even in the worse case of 4 bytes per char in UTF-8 view, we're still under the max).
 * The JVM makes no guarantees about trailing bits being preserved on NaN floating point representations like WebAssembly
  does. This causes some mismatch on WebAssembly tests depending on how the JVM "feels" (I haven't dug into why some
  bit patterns stay and some don't when NaNs are passed through methods).
--- a/compiler/src/main/kotlin/asmble/compile/jvm/ByteBufferMem.kt
+++ b/compiler/src/main/kotlin/asmble/compile/jvm/ByteBufferMem.kt
@ -49,8 +49,10 @@ open class ByteBufferMem(val direct: Boolean = true) : Mem {
                TypeInsnNode(Opcodes.CHECKCAST, memType.asmName)
            ).addInsns(
                // We're going to do this as an LDC string in ISO-8859 and read it back at runtime. However,
-                // due to JVM limits, we can't have a string > 65536 chars, so I'll chunk it every 65500 chars.
-                bytes.chunked(65500).flatMap { bytes ->
+                // due to JVM limits, we can't have a string > 65536 chars. We chunk into 16300 because when
+                // converting to UTF8 const it can be up to 4 bytes per char, so this makes sure it doesn't
+                // overflow.
+                bytes.chunked(16300).flatMap { bytes ->
                    sequenceOf(
                        LdcInsnNode(bytes.toString(Charsets.ISO_8859_1)),
                        LdcInsnNode("ISO-8859-1"),
--- a/compiler/src/test/kotlin/asmble/compile/jvm/LargeDataTest.kt
+++ b/compiler/src/test/kotlin/asmble/compile/jvm/LargeDataTest.kt
@ -12,7 +12,9 @@ import kotlin.test.assertEquals
 class LargeDataTest : TestBase() {
    @Test
    fun testLargeData() {
-        // This previously failed because string constants can't be longer than 65536 chars
+        // This previously failed because string constants can't be longer than 65536 chars.
+        // We create a byte array across the whole gambit of bytes to test UTF8 encoding.
+        val bytesExpected = ByteArray(70000) { ((it % 255) - Byte.MIN_VALUE).toByte() }
        val mod = Node.Module(
            memories = listOf(Node.Type.Memory(
                limits = Node.ResizableLimits(initial = 2, maximum = 2)
@ -20,7 +22,7 @@ class LargeDataTest : TestBase() {
            data = listOf(Node.Data(
                index = 0,
                offset = listOf(Node.Instr.I32Const(0)),
-                data = ByteArray(70000) { 'a'.toByte() }
+                data = bytesExpected
            ))
        )
        val ctx = ClsContext(
@ -35,9 +37,9 @@ class LargeDataTest : TestBase() {
        val field = cls.getDeclaredField("memory").apply { isAccessible = true }
        val buf = field[cls.newInstance()] as ByteBuffer
        // Grab all + 1 and check values
-        val bytes = ByteArray(70001).also { buf.get(0, it) }
-        bytes.forEachIndexed { index, byte ->
-            assertEquals(if (index == 70000) 0.toByte() else 'a'.toByte(), byte)
+        val bytesActual = ByteArray(70001).also { buf.get(0, it) }
+        bytesActual.forEachIndexed { index, byte ->
+            assertEquals(if (index == 70000) 0.toByte() else bytesExpected[index], byte)
        }
    }
 }