mirror of
https://github.com/fluencelabs/asmble
synced 2025-04-24 14:22:20 +00:00
Chunk data sections even smaller per #18 and update README explanation
This commit is contained in:
parent
1127b61eb5
commit
73862e9bc9
@ -374,8 +374,9 @@ and the JVM:
|
||||
* WebAssembly has a nice data section for byte arrays whereas the JVM does not. Right now we use a single-byte-char
|
||||
string constant (i.e. ISO-8859 charset). This saves class file size, but this means we call `String::getBytes` on
|
||||
init to load bytes from the string constant. Due to the JVM using an unsigned 16-bit int as the string constant
|
||||
length, the maximum length is 65536, so we chunk data sections into as many max-65500-byte lengths we need to load it
|
||||
all.
|
||||
length, the maximum byte length is 65536. Since the string constants are stored as UTF-8 constants, they can be up to
|
||||
four bytes a character. Therefore, we populate memory in data chunks no larger than 16300 (nice round number to make
|
||||
sure that even in the worse case of 4 bytes per char in UTF-8 view, we're still under the max).
|
||||
* The JVM makes no guarantees about trailing bits being preserved on NaN floating point representations like WebAssembly
|
||||
does. This causes some mismatch on WebAssembly tests depending on how the JVM "feels" (I haven't dug into why some
|
||||
bit patterns stay and some don't when NaNs are passed through methods).
|
||||
|
@ -49,8 +49,10 @@ open class ByteBufferMem(val direct: Boolean = true) : Mem {
|
||||
TypeInsnNode(Opcodes.CHECKCAST, memType.asmName)
|
||||
).addInsns(
|
||||
// We're going to do this as an LDC string in ISO-8859 and read it back at runtime. However,
|
||||
// due to JVM limits, we can't have a string > 65536 chars, so I'll chunk it every 65500 chars.
|
||||
bytes.chunked(65500).flatMap { bytes ->
|
||||
// due to JVM limits, we can't have a string > 65536 chars. We chunk into 16300 because when
|
||||
// converting to UTF8 const it can be up to 4 bytes per char, so this makes sure it doesn't
|
||||
// overflow.
|
||||
bytes.chunked(16300).flatMap { bytes ->
|
||||
sequenceOf(
|
||||
LdcInsnNode(bytes.toString(Charsets.ISO_8859_1)),
|
||||
LdcInsnNode("ISO-8859-1"),
|
||||
|
@ -12,7 +12,9 @@ import kotlin.test.assertEquals
|
||||
class LargeDataTest : TestBase() {
|
||||
@Test
|
||||
fun testLargeData() {
|
||||
// This previously failed because string constants can't be longer than 65536 chars
|
||||
// This previously failed because string constants can't be longer than 65536 chars.
|
||||
// We create a byte array across the whole gambit of bytes to test UTF8 encoding.
|
||||
val bytesExpected = ByteArray(70000) { ((it % 255) - Byte.MIN_VALUE).toByte() }
|
||||
val mod = Node.Module(
|
||||
memories = listOf(Node.Type.Memory(
|
||||
limits = Node.ResizableLimits(initial = 2, maximum = 2)
|
||||
@ -20,7 +22,7 @@ class LargeDataTest : TestBase() {
|
||||
data = listOf(Node.Data(
|
||||
index = 0,
|
||||
offset = listOf(Node.Instr.I32Const(0)),
|
||||
data = ByteArray(70000) { 'a'.toByte() }
|
||||
data = bytesExpected
|
||||
))
|
||||
)
|
||||
val ctx = ClsContext(
|
||||
@ -35,9 +37,9 @@ class LargeDataTest : TestBase() {
|
||||
val field = cls.getDeclaredField("memory").apply { isAccessible = true }
|
||||
val buf = field[cls.newInstance()] as ByteBuffer
|
||||
// Grab all + 1 and check values
|
||||
val bytes = ByteArray(70001).also { buf.get(0, it) }
|
||||
bytes.forEachIndexed { index, byte ->
|
||||
assertEquals(if (index == 70000) 0.toByte() else 'a'.toByte(), byte)
|
||||
val bytesActual = ByteArray(70001).also { buf.get(0, it) }
|
||||
bytesActual.forEachIndexed { index, byte ->
|
||||
assertEquals(if (index == 70000) 0.toByte() else bytesExpected[index], byte)
|
||||
}
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user