mirror of
https://github.com/fluencelabs/assemblyscript
synced 2025-04-28 08:22:15 +00:00
Add lightweight paths for memcpy & memset for shrink level != 0 (#601)
This commit is contained in:
parent
af00bdeefe
commit
7cd04b65ef
@ -1,154 +1,163 @@
|
|||||||
// export function memcpy(dest: usize, src: usize, n: usize): void { // see: musl/src/string/memcpy.c
|
export function memcpy(dest: usize, src: usize, n: usize): void { // see: musl/src/string/memcpy.c
|
||||||
// var w: u32, x: u32;
|
if (ASC_SHRINK_LEVEL > 1) {
|
||||||
|
while (n) {
|
||||||
|
store<u8>(dest++, load<u8>(src++));
|
||||||
|
--n;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
let w: u32, x: u32;
|
||||||
|
|
||||||
// // copy 1 byte each until src is aligned to 4 bytes
|
// copy 1 byte each until src is aligned to 4 bytes
|
||||||
// while (n && (src & 3)) {
|
while (n && (src & 3)) {
|
||||||
// store<u8>(dest++, load<u8>(src++));
|
store<u8>(dest++, load<u8>(src++));
|
||||||
// n--;
|
n--;
|
||||||
// }
|
}
|
||||||
|
|
||||||
// // if dst is aligned to 4 bytes as well, copy 4 bytes each
|
// if dst is aligned to 4 bytes as well, copy 4 bytes each
|
||||||
// if ((dest & 3) == 0) {
|
if ((dest & 3) == 0) {
|
||||||
// while (n >= 16) {
|
while (n >= 16) {
|
||||||
// store<u32>(dest , load<u32>(src ));
|
store<u32>(dest , load<u32>(src ));
|
||||||
// store<u32>(dest + 4, load<u32>(src + 4));
|
store<u32>(dest + 4, load<u32>(src + 4));
|
||||||
// store<u32>(dest + 8, load<u32>(src + 8));
|
store<u32>(dest + 8, load<u32>(src + 8));
|
||||||
// store<u32>(dest + 12, load<u32>(src + 12));
|
store<u32>(dest + 12, load<u32>(src + 12));
|
||||||
// src += 16; dest += 16; n -= 16;
|
src += 16; dest += 16; n -= 16;
|
||||||
// }
|
}
|
||||||
// if (n & 8) {
|
if (n & 8) {
|
||||||
// store<u32>(dest , load<u32>(src ));
|
store<u32>(dest , load<u32>(src ));
|
||||||
// store<u32>(dest + 4, load<u32>(src + 4));
|
store<u32>(dest + 4, load<u32>(src + 4));
|
||||||
// dest += 8; src += 8;
|
dest += 8; src += 8;
|
||||||
// }
|
}
|
||||||
// if (n & 4) {
|
if (n & 4) {
|
||||||
// store<u32>(dest, load<u32>(src));
|
store<u32>(dest, load<u32>(src));
|
||||||
// dest += 4; src += 4;
|
dest += 4; src += 4;
|
||||||
// }
|
}
|
||||||
// if (n & 2) { // drop to 2 bytes each
|
if (n & 2) { // drop to 2 bytes each
|
||||||
// store<u16>(dest, load<u16>(src));
|
store<u16>(dest, load<u16>(src));
|
||||||
// dest += 2; src += 2;
|
dest += 2; src += 2;
|
||||||
// }
|
}
|
||||||
// if (n & 1) { // drop to 1 byte
|
if (n & 1) { // drop to 1 byte
|
||||||
// store<u8>(dest++, load<u8>(src++));
|
store<u8>(dest++, load<u8>(src++));
|
||||||
// }
|
}
|
||||||
// return;
|
return;
|
||||||
// }
|
}
|
||||||
|
|
||||||
// // if dst is not aligned to 4 bytes, use alternating shifts to copy 4 bytes each
|
// if dst is not aligned to 4 bytes, use alternating shifts to copy 4 bytes each
|
||||||
// // doing shifts if faster when copying enough bytes (here: 32 or more)
|
// doing shifts if faster when copying enough bytes (here: 32 or more)
|
||||||
// if (n >= 32) {
|
if (n >= 32) {
|
||||||
// switch (dest & 3) {
|
switch (dest & 3) {
|
||||||
// // known to be != 0
|
// known to be != 0
|
||||||
// case 1: {
|
case 1: {
|
||||||
// w = load<u32>(src);
|
w = load<u32>(src);
|
||||||
// store<u8>(dest++, load<u8>(src++));
|
store<u8>(dest++, load<u8>(src++));
|
||||||
// store<u8>(dest++, load<u8>(src++));
|
store<u8>(dest++, load<u8>(src++));
|
||||||
// store<u8>(dest++, load<u8>(src++));
|
store<u8>(dest++, load<u8>(src++));
|
||||||
// n -= 3;
|
n -= 3;
|
||||||
// while (n >= 17) {
|
while (n >= 17) {
|
||||||
// x = load<u32>(src + 1);
|
x = load<u32>(src + 1);
|
||||||
// store<u32>(dest, w >> 24 | x << 8);
|
store<u32>(dest, w >> 24 | x << 8);
|
||||||
// w = load<u32>(src + 5);
|
w = load<u32>(src + 5);
|
||||||
// store<u32>(dest + 4, x >> 24 | w << 8);
|
store<u32>(dest + 4, x >> 24 | w << 8);
|
||||||
// x = load<u32>(src + 9);
|
x = load<u32>(src + 9);
|
||||||
// store<u32>(dest + 8, w >> 24 | x << 8);
|
store<u32>(dest + 8, w >> 24 | x << 8);
|
||||||
// w = load<u32>(src + 13);
|
w = load<u32>(src + 13);
|
||||||
// store<u32>(dest + 12, x >> 24 | w << 8);
|
store<u32>(dest + 12, x >> 24 | w << 8);
|
||||||
// src += 16; dest += 16; n -= 16;
|
src += 16; dest += 16; n -= 16;
|
||||||
// }
|
}
|
||||||
// break;
|
break;
|
||||||
// }
|
}
|
||||||
// case 2: {
|
case 2: {
|
||||||
// w = load<u32>(src);
|
w = load<u32>(src);
|
||||||
// store<u8>(dest++, load<u8>(src++));
|
store<u8>(dest++, load<u8>(src++));
|
||||||
// store<u8>(dest++, load<u8>(src++));
|
store<u8>(dest++, load<u8>(src++));
|
||||||
// n -= 2;
|
n -= 2;
|
||||||
// while (n >= 18) {
|
while (n >= 18) {
|
||||||
// x = load<u32>(src + 2);
|
x = load<u32>(src + 2);
|
||||||
// store<u32>(dest, w >> 16 | x << 16);
|
store<u32>(dest, w >> 16 | x << 16);
|
||||||
// w = load<u32>(src + 6);
|
w = load<u32>(src + 6);
|
||||||
// store<u32>(dest + 4, x >> 16 | w << 16);
|
store<u32>(dest + 4, x >> 16 | w << 16);
|
||||||
// x = load<u32>(src + 10);
|
x = load<u32>(src + 10);
|
||||||
// store<u32>(dest + 8, w >> 16 | x << 16);
|
store<u32>(dest + 8, w >> 16 | x << 16);
|
||||||
// w = load<u32>(src + 14);
|
w = load<u32>(src + 14);
|
||||||
// store<u32>(dest + 12, x >> 16 | w << 16);
|
store<u32>(dest + 12, x >> 16 | w << 16);
|
||||||
// src += 16; dest += 16; n -= 16;
|
src += 16; dest += 16; n -= 16;
|
||||||
// }
|
}
|
||||||
// break;
|
break;
|
||||||
// }
|
}
|
||||||
// case 3: {
|
case 3: {
|
||||||
// w = load<u32>(src);
|
w = load<u32>(src);
|
||||||
// store<u8>(dest++, load<u8>(src++));
|
store<u8>(dest++, load<u8>(src++));
|
||||||
// n -= 1;
|
n -= 1;
|
||||||
// while (n >= 19) {
|
while (n >= 19) {
|
||||||
// x = load<u32>(src + 3);
|
x = load<u32>(src + 3);
|
||||||
// store<u32>(dest, w >> 8 | x << 24);
|
store<u32>(dest, w >> 8 | x << 24);
|
||||||
// w = load<u32>(src + 7);
|
w = load<u32>(src + 7);
|
||||||
// store<u32>(dest + 4, x >> 8 | w << 24);
|
store<u32>(dest + 4, x >> 8 | w << 24);
|
||||||
// x = load<u32>(src + 11);
|
x = load<u32>(src + 11);
|
||||||
// store<u32>(dest + 8, w >> 8 | x << 24);
|
store<u32>(dest + 8, w >> 8 | x << 24);
|
||||||
// w = load<u32>(src + 15);
|
w = load<u32>(src + 15);
|
||||||
// store<u32>(dest + 12, x >> 8 | w << 24);
|
store<u32>(dest + 12, x >> 8 | w << 24);
|
||||||
// src += 16; dest += 16; n -= 16;
|
src += 16; dest += 16; n -= 16;
|
||||||
// }
|
}
|
||||||
// break;
|
break;
|
||||||
// }
|
}
|
||||||
// }
|
}
|
||||||
// }
|
}
|
||||||
|
|
||||||
// // copy remaining bytes one by one
|
// copy remaining bytes one by one
|
||||||
// if (n & 16) {
|
if (n & 16) {
|
||||||
// store<u8>(dest++, load<u8>(src++));
|
store<u8>(dest++, load<u8>(src++));
|
||||||
// store<u8>(dest++, load<u8>(src++));
|
store<u8>(dest++, load<u8>(src++));
|
||||||
// store<u8>(dest++, load<u8>(src++));
|
store<u8>(dest++, load<u8>(src++));
|
||||||
// store<u8>(dest++, load<u8>(src++));
|
store<u8>(dest++, load<u8>(src++));
|
||||||
// store<u8>(dest++, load<u8>(src++));
|
store<u8>(dest++, load<u8>(src++));
|
||||||
// store<u8>(dest++, load<u8>(src++));
|
store<u8>(dest++, load<u8>(src++));
|
||||||
// store<u8>(dest++, load<u8>(src++));
|
store<u8>(dest++, load<u8>(src++));
|
||||||
// store<u8>(dest++, load<u8>(src++));
|
store<u8>(dest++, load<u8>(src++));
|
||||||
// store<u8>(dest++, load<u8>(src++));
|
store<u8>(dest++, load<u8>(src++));
|
||||||
// store<u8>(dest++, load<u8>(src++));
|
store<u8>(dest++, load<u8>(src++));
|
||||||
// store<u8>(dest++, load<u8>(src++));
|
store<u8>(dest++, load<u8>(src++));
|
||||||
// store<u8>(dest++, load<u8>(src++));
|
store<u8>(dest++, load<u8>(src++));
|
||||||
// store<u8>(dest++, load<u8>(src++));
|
store<u8>(dest++, load<u8>(src++));
|
||||||
// store<u8>(dest++, load<u8>(src++));
|
store<u8>(dest++, load<u8>(src++));
|
||||||
// store<u8>(dest++, load<u8>(src++));
|
store<u8>(dest++, load<u8>(src++));
|
||||||
// store<u8>(dest++, load<u8>(src++));
|
store<u8>(dest++, load<u8>(src++));
|
||||||
// }
|
}
|
||||||
// if (n & 8) {
|
if (n & 8) {
|
||||||
// store<u8>(dest++, load<u8>(src++));
|
store<u8>(dest++, load<u8>(src++));
|
||||||
// store<u8>(dest++, load<u8>(src++));
|
store<u8>(dest++, load<u8>(src++));
|
||||||
// store<u8>(dest++, load<u8>(src++));
|
store<u8>(dest++, load<u8>(src++));
|
||||||
// store<u8>(dest++, load<u8>(src++));
|
store<u8>(dest++, load<u8>(src++));
|
||||||
// store<u8>(dest++, load<u8>(src++));
|
store<u8>(dest++, load<u8>(src++));
|
||||||
// store<u8>(dest++, load<u8>(src++));
|
store<u8>(dest++, load<u8>(src++));
|
||||||
// store<u8>(dest++, load<u8>(src++));
|
store<u8>(dest++, load<u8>(src++));
|
||||||
// store<u8>(dest++, load<u8>(src++));
|
store<u8>(dest++, load<u8>(src++));
|
||||||
// }
|
}
|
||||||
// if (n & 4) {
|
if (n & 4) {
|
||||||
// store<u8>(dest++, load<u8>(src++));
|
store<u8>(dest++, load<u8>(src++));
|
||||||
// store<u8>(dest++, load<u8>(src++));
|
store<u8>(dest++, load<u8>(src++));
|
||||||
// store<u8>(dest++, load<u8>(src++));
|
store<u8>(dest++, load<u8>(src++));
|
||||||
// store<u8>(dest++, load<u8>(src++));
|
store<u8>(dest++, load<u8>(src++));
|
||||||
// }
|
}
|
||||||
// if (n & 2) {
|
if (n & 2) {
|
||||||
// store<u8>(dest++, load<u8>(src++));
|
store<u8>(dest++, load<u8>(src++));
|
||||||
// store<u8>(dest++, load<u8>(src++));
|
store<u8>(dest++, load<u8>(src++));
|
||||||
// }
|
}
|
||||||
// if (n & 1) {
|
if (n & 1) {
|
||||||
// store<u8>(dest++, load<u8>(src++));
|
store<u8>(dest++, load<u8>(src++));
|
||||||
// }
|
}
|
||||||
// }
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// @ts-ignore: decorator
|
// @ts-ignore: decorator
|
||||||
@inline
|
@inline
|
||||||
export function memmove(dest: usize, src: usize, n: usize): void { // see: musl/src/string/memmove.c
|
export function memmove(dest: usize, src: usize, n: usize): void { // see: musl/src/string/memmove.c
|
||||||
if (dest === src) return;
|
if (dest === src) return;
|
||||||
// if (src + n <= dest || dest + n <= src) {
|
if (ASC_SHRINK_LEVEL < 1) {
|
||||||
// memcpy(dest, src, n);
|
if (src + n <= dest || dest + n <= src) {
|
||||||
// return;
|
memcpy(dest, src, n);
|
||||||
// }
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
if (dest < src) {
|
if (dest < src) {
|
||||||
if ((src & 7) == (dest & 7)) {
|
if ((src & 7) == (dest & 7)) {
|
||||||
while (dest & 7) {
|
while (dest & 7) {
|
||||||
@ -187,7 +196,12 @@ export function memmove(dest: usize, src: usize, n: usize): void { // see: musl/
|
|||||||
// @ts-ignore: decorator
|
// @ts-ignore: decorator
|
||||||
@inline
|
@inline
|
||||||
export function memset(dest: usize, c: u8, n: usize): void { // see: musl/src/string/memset
|
export function memset(dest: usize, c: u8, n: usize): void { // see: musl/src/string/memset
|
||||||
|
if (ASC_SHRINK_LEVEL > 1) {
|
||||||
|
while (n) {
|
||||||
|
store<u8>(dest++, c);
|
||||||
|
--n;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
// fill head and tail with minimal branching
|
// fill head and tail with minimal branching
|
||||||
if (!n) return;
|
if (!n) return;
|
||||||
store<u8>(dest, c);
|
store<u8>(dest, c);
|
||||||
@ -204,12 +218,12 @@ export function memset(dest: usize, c: u8, n: usize): void { // see: musl/src/st
|
|||||||
if (n <= 8) return;
|
if (n <= 8) return;
|
||||||
|
|
||||||
// advance pointer to align it at 4-byte boundary
|
// advance pointer to align it at 4-byte boundary
|
||||||
var k: usize = -dest & 3;
|
let k: usize = -dest & 3;
|
||||||
dest += k;
|
dest += k;
|
||||||
n -= k;
|
n -= k;
|
||||||
n &= -4;
|
n &= -4;
|
||||||
|
|
||||||
var c32: u32 = <u32>-1 / 255 * c;
|
let c32: u32 = <u32>-1 / 255 * c;
|
||||||
|
|
||||||
// fill head/tail up to 28 bytes each in preparation
|
// fill head/tail up to 28 bytes each in preparation
|
||||||
store<u32>(dest, c32);
|
store<u32>(dest, c32);
|
||||||
@ -235,7 +249,7 @@ export function memset(dest: usize, c: u8, n: usize): void { // see: musl/src/st
|
|||||||
n -= k;
|
n -= k;
|
||||||
|
|
||||||
// copy 32 bytes each
|
// copy 32 bytes each
|
||||||
var c64: u64 = <u64>c32 | (<u64>c32 << 32);
|
let c64: u64 = <u64>c32 | (<u64>c32 << 32);
|
||||||
while (n >= 32) {
|
while (n >= 32) {
|
||||||
store<u64>(dest, c64);
|
store<u64>(dest, c64);
|
||||||
store<u64>(dest + 8, c64);
|
store<u64>(dest + 8, c64);
|
||||||
@ -244,6 +258,7 @@ export function memset(dest: usize, c: u8, n: usize): void { // see: musl/src/st
|
|||||||
n -= 32;
|
n -= 32;
|
||||||
dest += 32;
|
dest += 32;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// @ts-ignore: decorator
|
// @ts-ignore: decorator
|
||||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user