Chunk data sections even smaller per #18 and update README explanation

This commit is contained in:
Chad Retz 2018-07-26 00:27:17 -05:00
parent 1127b61eb5
commit 73862e9bc9
3 changed files with 14 additions and 9 deletions

View File

@ -374,8 +374,9 @@ and the JVM:
* WebAssembly has a nice data section for byte arrays whereas the JVM does not. Right now we use a single-byte-char * WebAssembly has a nice data section for byte arrays whereas the JVM does not. Right now we use a single-byte-char
string constant (i.e. ISO-8859 charset). This saves class file size, but this means we call `String::getBytes` on string constant (i.e. ISO-8859 charset). This saves class file size, but this means we call `String::getBytes` on
init to load bytes from the string constant. Due to the JVM using an unsigned 16-bit int as the string constant init to load bytes from the string constant. Due to the JVM using an unsigned 16-bit int as the string constant
length, the maximum length is 65536, so we chunk data sections into as many max-65500-byte lengths we need to load it length, the maximum byte length is 65536. Since the string constants are stored as UTF-8 constants, they can be up to
all. four bytes a character. Therefore, we populate memory in data chunks no larger than 16300 (nice round number to make
sure that even in the worse case of 4 bytes per char in UTF-8 view, we're still under the max).
* The JVM makes no guarantees about trailing bits being preserved on NaN floating point representations like WebAssembly * The JVM makes no guarantees about trailing bits being preserved on NaN floating point representations like WebAssembly
does. This causes some mismatch on WebAssembly tests depending on how the JVM "feels" (I haven't dug into why some does. This causes some mismatch on WebAssembly tests depending on how the JVM "feels" (I haven't dug into why some
bit patterns stay and some don't when NaNs are passed through methods). bit patterns stay and some don't when NaNs are passed through methods).

View File

@ -49,8 +49,10 @@ open class ByteBufferMem(val direct: Boolean = true) : Mem {
TypeInsnNode(Opcodes.CHECKCAST, memType.asmName) TypeInsnNode(Opcodes.CHECKCAST, memType.asmName)
).addInsns( ).addInsns(
// We're going to do this as an LDC string in ISO-8859 and read it back at runtime. However, // We're going to do this as an LDC string in ISO-8859 and read it back at runtime. However,
// due to JVM limits, we can't have a string > 65536 chars, so I'll chunk it every 65500 chars. // due to JVM limits, we can't have a string > 65536 chars. We chunk into 16300 because when
bytes.chunked(65500).flatMap { bytes -> // converting to UTF8 const it can be up to 4 bytes per char, so this makes sure it doesn't
// overflow.
bytes.chunked(16300).flatMap { bytes ->
sequenceOf( sequenceOf(
LdcInsnNode(bytes.toString(Charsets.ISO_8859_1)), LdcInsnNode(bytes.toString(Charsets.ISO_8859_1)),
LdcInsnNode("ISO-8859-1"), LdcInsnNode("ISO-8859-1"),

View File

@ -12,7 +12,9 @@ import kotlin.test.assertEquals
class LargeDataTest : TestBase() { class LargeDataTest : TestBase() {
@Test @Test
fun testLargeData() { fun testLargeData() {
// This previously failed because string constants can't be longer than 65536 chars // This previously failed because string constants can't be longer than 65536 chars.
// We create a byte array across the whole gambit of bytes to test UTF8 encoding.
val bytesExpected = ByteArray(70000) { ((it % 255) - Byte.MIN_VALUE).toByte() }
val mod = Node.Module( val mod = Node.Module(
memories = listOf(Node.Type.Memory( memories = listOf(Node.Type.Memory(
limits = Node.ResizableLimits(initial = 2, maximum = 2) limits = Node.ResizableLimits(initial = 2, maximum = 2)
@ -20,7 +22,7 @@ class LargeDataTest : TestBase() {
data = listOf(Node.Data( data = listOf(Node.Data(
index = 0, index = 0,
offset = listOf(Node.Instr.I32Const(0)), offset = listOf(Node.Instr.I32Const(0)),
data = ByteArray(70000) { 'a'.toByte() } data = bytesExpected
)) ))
) )
val ctx = ClsContext( val ctx = ClsContext(
@ -35,9 +37,9 @@ class LargeDataTest : TestBase() {
val field = cls.getDeclaredField("memory").apply { isAccessible = true } val field = cls.getDeclaredField("memory").apply { isAccessible = true }
val buf = field[cls.newInstance()] as ByteBuffer val buf = field[cls.newInstance()] as ByteBuffer
// Grab all + 1 and check values // Grab all + 1 and check values
val bytes = ByteArray(70001).also { buf.get(0, it) } val bytesActual = ByteArray(70001).also { buf.get(0, it) }
bytes.forEachIndexed { index, byte -> bytesActual.forEachIndexed { index, byte ->
assertEquals(if (index == 70000) 0.toByte() else 'a'.toByte(), byte) assertEquals(if (index == 70000) 0.toByte() else bytesExpected[index], byte)
} }
} }
} }