169 lines
5.2 KiB
TypeScript

export function memcpy(dest: usize, src: usize, n: usize): usize {
var ret = dest;
var w: u32, x: u32;
// copy 1 byte each until src is aligned to 4 bytes
while (n && src % 4) {
store<u8>(dest++, load<u8>(src++));
n--;
}
// if dst is aligned to 4 bytes as well, copy 4 bytes each
if (dest % 4 == 0) {
while (n >= 16) {
store<u32>(dest , load<u32>(src ));
store<u32>(dest + 4, load<u32>(src + 4));
store<u32>(dest + 8, load<u32>(src + 8));
store<u32>(dest + 12, load<u32>(src + 12));
src += 16; dest += 16; n -= 16;
}
if (n & 8) {
store<u32>(dest , load<u32>(src ));
store<u32>(dest + 4, load<u32>(src + 4));
dest += 8; src += 8;
}
if (n & 4) {
store<u32>(dest, load<u32>(src));
dest += 4; src += 4;
}
if (n & 2) { // drop to 2 bytes each
store<u16>(dest, load<u16>(src));
dest += 2; src += 2;
}
if (n & 1) { // drop to 1 byte
store<u8>(dest++, load<u8>(src++));
}
return ret;
}
// if dst is not aligned to 4 bytes, use alternating shifts to copy 4 bytes each
// doing shifts if faster when copying enough bytes (here: 32 or more)
if (n >= 32) {
switch (dest % 4) {
// known to be != 0
case 1:
w = load<u32>(src);
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
n -= 3;
while (n >= 17) {
x = load<u32>(src + 1);
store<u32>(dest, w >> 24 | x << 8);
w = load<u32>(src + 5);
store<u32>(dest + 4, x >> 24 | w << 8);
x = load<u32>(src + 9);
store<u32>(dest + 8, w >> 24 | x << 8);
w = load<u32>(src + 13);
store<u32>(dest + 12, x >> 24 | w << 8);
src += 16; dest += 16; n -= 16;
}
break;
case 2:
w = load<u32>(src);
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
n -= 2;
while (n >= 18) {
x = load<u32>(src + 2);
store<u32>(dest, w >> 16 | x << 16);
w = load<u32>(src + 6);
store<u32>(dest + 4, x >> 16 | w << 16);
x = load<u32>(src + 10);
store<u32>(dest + 8, w >> 16 | x << 16);
w = load<u32>(src + 14);
store<u32>(dest + 12, x >> 16 | w << 16);
src += 16; dest += 16; n -= 16;
}
break;
case 3:
w = load<u32>(src);
store<u8>(dest++, load<u8>(src++));
n -= 1;
while (n >= 19) {
x = load<u32>(src + 3);
store<u32>(dest, w >> 8 | x << 24);
w = load<u32>(src + 7);
store<u32>(dest + 4, x >> 8 | w << 24);
x = load<u32>(src + 11);
store<u32>(dest + 8, w >> 8 | x << 24);
w = load<u32>(src + 15);
store<u32>(dest + 12, x >> 8 | w << 24);
src += 16; dest += 16; n -= 16;
}
break;
}
}
// copy remaining bytes one by one
if (n & 16) {
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
}
if (n & 8) {
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
}
if (n & 4) {
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
}
if (n & 2) {
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
}
if (n & 1) {
store<u8>(dest++, load<u8>(src++));
}
return ret;
}
const base: usize = 8;
store<u64>(base , 0x1111111111111111);
store<u64>(base + 8 , 0x2222222222222222);
store<u64>(base + 16, 0x3333333333333333);
store<u64>(base + 24, 0x4444444444444444);
var dest: usize;
dest = memcpy(base + 1, base + 16, 4);
assert(dest == base + 1);
assert(load<u64>(base) == 0x1111113333333311);
dest = memcpy(base, base, 32);
assert(dest == base);
assert(load<u64>(base) == 0x1111113333333311);
assert(load<u64>(base + 8) == 0x2222222222222222);
assert(load<u64>(base + 16) == 0x3333333333333333);
assert(load<u64>(base + 24) == 0x4444444444444444);
dest = memcpy(base + 5, base + 28, 3);
assert(load<u64>(base) == 0x4444443333333311);
dest = memcpy(base + 8, base + 16, 15);
assert(load<u64>(base) == 0x4444443333333311);
assert(load<u64>(base + 8) == 0x3333333333333333);
assert(load<u64>(base + 16) == 0x3344444444444444);
assert(load<u64>(base + 24) == 0x4444444444444444);