Add lightweight paths for memcpy & memset for shrink level != 0 (#601)

This commit is contained in:
Max Graey 2019-05-24 17:00:02 +03:00 committed by Daniel Wirtz
parent af00bdeefe
commit 7cd04b65ef
22 changed files with 26977 additions and 1492 deletions

View File

@ -1,154 +1,163 @@
// export function memcpy(dest: usize, src: usize, n: usize): void { // see: musl/src/string/memcpy.c export function memcpy(dest: usize, src: usize, n: usize): void { // see: musl/src/string/memcpy.c
// var w: u32, x: u32; if (ASC_SHRINK_LEVEL > 1) {
while (n) {
store<u8>(dest++, load<u8>(src++));
--n;
}
} else {
let w: u32, x: u32;
// // copy 1 byte each until src is aligned to 4 bytes // copy 1 byte each until src is aligned to 4 bytes
// while (n && (src & 3)) { while (n && (src & 3)) {
// store<u8>(dest++, load<u8>(src++)); store<u8>(dest++, load<u8>(src++));
// n--; n--;
// } }
// // if dst is aligned to 4 bytes as well, copy 4 bytes each // if dst is aligned to 4 bytes as well, copy 4 bytes each
// if ((dest & 3) == 0) { if ((dest & 3) == 0) {
// while (n >= 16) { while (n >= 16) {
// store<u32>(dest , load<u32>(src )); store<u32>(dest , load<u32>(src ));
// store<u32>(dest + 4, load<u32>(src + 4)); store<u32>(dest + 4, load<u32>(src + 4));
// store<u32>(dest + 8, load<u32>(src + 8)); store<u32>(dest + 8, load<u32>(src + 8));
// store<u32>(dest + 12, load<u32>(src + 12)); store<u32>(dest + 12, load<u32>(src + 12));
// src += 16; dest += 16; n -= 16; src += 16; dest += 16; n -= 16;
// } }
// if (n & 8) { if (n & 8) {
// store<u32>(dest , load<u32>(src )); store<u32>(dest , load<u32>(src ));
// store<u32>(dest + 4, load<u32>(src + 4)); store<u32>(dest + 4, load<u32>(src + 4));
// dest += 8; src += 8; dest += 8; src += 8;
// } }
// if (n & 4) { if (n & 4) {
// store<u32>(dest, load<u32>(src)); store<u32>(dest, load<u32>(src));
// dest += 4; src += 4; dest += 4; src += 4;
// } }
// if (n & 2) { // drop to 2 bytes each if (n & 2) { // drop to 2 bytes each
// store<u16>(dest, load<u16>(src)); store<u16>(dest, load<u16>(src));
// dest += 2; src += 2; dest += 2; src += 2;
// } }
// if (n & 1) { // drop to 1 byte if (n & 1) { // drop to 1 byte
// store<u8>(dest++, load<u8>(src++)); store<u8>(dest++, load<u8>(src++));
// } }
// return; return;
// } }
// // if dst is not aligned to 4 bytes, use alternating shifts to copy 4 bytes each // if dst is not aligned to 4 bytes, use alternating shifts to copy 4 bytes each
// // doing shifts if faster when copying enough bytes (here: 32 or more) // doing shifts if faster when copying enough bytes (here: 32 or more)
// if (n >= 32) { if (n >= 32) {
// switch (dest & 3) { switch (dest & 3) {
// // known to be != 0 // known to be != 0
// case 1: { case 1: {
// w = load<u32>(src); w = load<u32>(src);
// store<u8>(dest++, load<u8>(src++)); store<u8>(dest++, load<u8>(src++));
// store<u8>(dest++, load<u8>(src++)); store<u8>(dest++, load<u8>(src++));
// store<u8>(dest++, load<u8>(src++)); store<u8>(dest++, load<u8>(src++));
// n -= 3; n -= 3;
// while (n >= 17) { while (n >= 17) {
// x = load<u32>(src + 1); x = load<u32>(src + 1);
// store<u32>(dest, w >> 24 | x << 8); store<u32>(dest, w >> 24 | x << 8);
// w = load<u32>(src + 5); w = load<u32>(src + 5);
// store<u32>(dest + 4, x >> 24 | w << 8); store<u32>(dest + 4, x >> 24 | w << 8);
// x = load<u32>(src + 9); x = load<u32>(src + 9);
// store<u32>(dest + 8, w >> 24 | x << 8); store<u32>(dest + 8, w >> 24 | x << 8);
// w = load<u32>(src + 13); w = load<u32>(src + 13);
// store<u32>(dest + 12, x >> 24 | w << 8); store<u32>(dest + 12, x >> 24 | w << 8);
// src += 16; dest += 16; n -= 16; src += 16; dest += 16; n -= 16;
// } }
// break; break;
// } }
// case 2: { case 2: {
// w = load<u32>(src); w = load<u32>(src);
// store<u8>(dest++, load<u8>(src++)); store<u8>(dest++, load<u8>(src++));
// store<u8>(dest++, load<u8>(src++)); store<u8>(dest++, load<u8>(src++));
// n -= 2; n -= 2;
// while (n >= 18) { while (n >= 18) {
// x = load<u32>(src + 2); x = load<u32>(src + 2);
// store<u32>(dest, w >> 16 | x << 16); store<u32>(dest, w >> 16 | x << 16);
// w = load<u32>(src + 6); w = load<u32>(src + 6);
// store<u32>(dest + 4, x >> 16 | w << 16); store<u32>(dest + 4, x >> 16 | w << 16);
// x = load<u32>(src + 10); x = load<u32>(src + 10);
// store<u32>(dest + 8, w >> 16 | x << 16); store<u32>(dest + 8, w >> 16 | x << 16);
// w = load<u32>(src + 14); w = load<u32>(src + 14);
// store<u32>(dest + 12, x >> 16 | w << 16); store<u32>(dest + 12, x >> 16 | w << 16);
// src += 16; dest += 16; n -= 16; src += 16; dest += 16; n -= 16;
// } }
// break; break;
// } }
// case 3: { case 3: {
// w = load<u32>(src); w = load<u32>(src);
// store<u8>(dest++, load<u8>(src++)); store<u8>(dest++, load<u8>(src++));
// n -= 1; n -= 1;
// while (n >= 19) { while (n >= 19) {
// x = load<u32>(src + 3); x = load<u32>(src + 3);
// store<u32>(dest, w >> 8 | x << 24); store<u32>(dest, w >> 8 | x << 24);
// w = load<u32>(src + 7); w = load<u32>(src + 7);
// store<u32>(dest + 4, x >> 8 | w << 24); store<u32>(dest + 4, x >> 8 | w << 24);
// x = load<u32>(src + 11); x = load<u32>(src + 11);
// store<u32>(dest + 8, w >> 8 | x << 24); store<u32>(dest + 8, w >> 8 | x << 24);
// w = load<u32>(src + 15); w = load<u32>(src + 15);
// store<u32>(dest + 12, x >> 8 | w << 24); store<u32>(dest + 12, x >> 8 | w << 24);
// src += 16; dest += 16; n -= 16; src += 16; dest += 16; n -= 16;
// } }
// break; break;
// } }
// } }
// } }
// // copy remaining bytes one by one // copy remaining bytes one by one
// if (n & 16) { if (n & 16) {
// store<u8>(dest++, load<u8>(src++)); store<u8>(dest++, load<u8>(src++));
// store<u8>(dest++, load<u8>(src++)); store<u8>(dest++, load<u8>(src++));
// store<u8>(dest++, load<u8>(src++)); store<u8>(dest++, load<u8>(src++));
// store<u8>(dest++, load<u8>(src++)); store<u8>(dest++, load<u8>(src++));
// store<u8>(dest++, load<u8>(src++)); store<u8>(dest++, load<u8>(src++));
// store<u8>(dest++, load<u8>(src++)); store<u8>(dest++, load<u8>(src++));
// store<u8>(dest++, load<u8>(src++)); store<u8>(dest++, load<u8>(src++));
// store<u8>(dest++, load<u8>(src++)); store<u8>(dest++, load<u8>(src++));
// store<u8>(dest++, load<u8>(src++)); store<u8>(dest++, load<u8>(src++));
// store<u8>(dest++, load<u8>(src++)); store<u8>(dest++, load<u8>(src++));
// store<u8>(dest++, load<u8>(src++)); store<u8>(dest++, load<u8>(src++));
// store<u8>(dest++, load<u8>(src++)); store<u8>(dest++, load<u8>(src++));
// store<u8>(dest++, load<u8>(src++)); store<u8>(dest++, load<u8>(src++));
// store<u8>(dest++, load<u8>(src++)); store<u8>(dest++, load<u8>(src++));
// store<u8>(dest++, load<u8>(src++)); store<u8>(dest++, load<u8>(src++));
// store<u8>(dest++, load<u8>(src++)); store<u8>(dest++, load<u8>(src++));
// } }
// if (n & 8) { if (n & 8) {
// store<u8>(dest++, load<u8>(src++)); store<u8>(dest++, load<u8>(src++));
// store<u8>(dest++, load<u8>(src++)); store<u8>(dest++, load<u8>(src++));
// store<u8>(dest++, load<u8>(src++)); store<u8>(dest++, load<u8>(src++));
// store<u8>(dest++, load<u8>(src++)); store<u8>(dest++, load<u8>(src++));
// store<u8>(dest++, load<u8>(src++)); store<u8>(dest++, load<u8>(src++));
// store<u8>(dest++, load<u8>(src++)); store<u8>(dest++, load<u8>(src++));
// store<u8>(dest++, load<u8>(src++)); store<u8>(dest++, load<u8>(src++));
// store<u8>(dest++, load<u8>(src++)); store<u8>(dest++, load<u8>(src++));
// } }
// if (n & 4) { if (n & 4) {
// store<u8>(dest++, load<u8>(src++)); store<u8>(dest++, load<u8>(src++));
// store<u8>(dest++, load<u8>(src++)); store<u8>(dest++, load<u8>(src++));
// store<u8>(dest++, load<u8>(src++)); store<u8>(dest++, load<u8>(src++));
// store<u8>(dest++, load<u8>(src++)); store<u8>(dest++, load<u8>(src++));
// } }
// if (n & 2) { if (n & 2) {
// store<u8>(dest++, load<u8>(src++)); store<u8>(dest++, load<u8>(src++));
// store<u8>(dest++, load<u8>(src++)); store<u8>(dest++, load<u8>(src++));
// } }
// if (n & 1) { if (n & 1) {
// store<u8>(dest++, load<u8>(src++)); store<u8>(dest++, load<u8>(src++));
// } }
// } }
}
// @ts-ignore: decorator // @ts-ignore: decorator
@inline @inline
export function memmove(dest: usize, src: usize, n: usize): void { // see: musl/src/string/memmove.c export function memmove(dest: usize, src: usize, n: usize): void { // see: musl/src/string/memmove.c
if (dest === src) return; if (dest === src) return;
// if (src + n <= dest || dest + n <= src) { if (ASC_SHRINK_LEVEL < 1) {
// memcpy(dest, src, n); if (src + n <= dest || dest + n <= src) {
// return; memcpy(dest, src, n);
// } return;
}
}
if (dest < src) { if (dest < src) {
if ((src & 7) == (dest & 7)) { if ((src & 7) == (dest & 7)) {
while (dest & 7) { while (dest & 7) {
@ -187,7 +196,12 @@ export function memmove(dest: usize, src: usize, n: usize): void { // see: musl/
// @ts-ignore: decorator // @ts-ignore: decorator
@inline @inline
export function memset(dest: usize, c: u8, n: usize): void { // see: musl/src/string/memset export function memset(dest: usize, c: u8, n: usize): void { // see: musl/src/string/memset
if (ASC_SHRINK_LEVEL > 1) {
while (n) {
store<u8>(dest++, c);
--n;
}
} else {
// fill head and tail with minimal branching // fill head and tail with minimal branching
if (!n) return; if (!n) return;
store<u8>(dest, c); store<u8>(dest, c);
@ -204,12 +218,12 @@ export function memset(dest: usize, c: u8, n: usize): void { // see: musl/src/st
if (n <= 8) return; if (n <= 8) return;
// advance pointer to align it at 4-byte boundary // advance pointer to align it at 4-byte boundary
var k: usize = -dest & 3; let k: usize = -dest & 3;
dest += k; dest += k;
n -= k; n -= k;
n &= -4; n &= -4;
var c32: u32 = <u32>-1 / 255 * c; let c32: u32 = <u32>-1 / 255 * c;
// fill head/tail up to 28 bytes each in preparation // fill head/tail up to 28 bytes each in preparation
store<u32>(dest, c32); store<u32>(dest, c32);
@ -235,7 +249,7 @@ export function memset(dest: usize, c: u8, n: usize): void { // see: musl/src/st
n -= k; n -= k;
// copy 32 bytes each // copy 32 bytes each
var c64: u64 = <u64>c32 | (<u64>c32 << 32); let c64: u64 = <u64>c32 | (<u64>c32 << 32);
while (n >= 32) { while (n >= 32) {
store<u64>(dest, c64); store<u64>(dest, c64);
store<u64>(dest + 8, c64); store<u64>(dest + 8, c64);
@ -244,6 +258,7 @@ export function memset(dest: usize, c: u8, n: usize): void { // see: musl/src/st
n -= 32; n -= 32;
dest += 32; dest += 32;
} }
}
} }
// @ts-ignore: decorator // @ts-ignore: decorator

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff