Improve per-bit divmod loop for mod & rem (#307)

This commit is contained in:
Max Graey
2018-10-30 16:10:50 +02:00
committed by Daniel Wirtz
parent d864977a1a
commit d864bef1eb
7 changed files with 936 additions and 1079 deletions

View File

@ -24,12 +24,12 @@
(local $3 i64)
(local $4 i64)
(local $5 i64)
(local $6 i64)
(local $7 i32)
(local $6 i32)
(local $7 i64)
(local $8 i64)
(local $9 i64)
(block $folding-inner0
(set_local $3
(set_local $4
(i64.and
(i64.shr_u
(tee_local $2
@ -42,10 +42,10 @@
(i64.const 2047)
)
)
(set_local $6
(set_local $5
(i64.and
(i64.shr_u
(tee_local $5
(tee_local $3
(i64.reinterpret/f64
(get_local $1)
)
@ -63,11 +63,11 @@
)
(if
(i32.eqz
(tee_local $7
(tee_local $6
(i64.eq
(tee_local $4
(tee_local $7
(i64.shl
(get_local $5)
(get_local $3)
(i64.const 1)
)
)
@ -75,18 +75,18 @@
)
)
)
(set_local $7
(set_local $6
(i64.eq
(get_local $3)
(get_local $4)
(i64.const 2047)
)
)
)
(if
(i32.eqz
(get_local $7)
(get_local $6)
)
(set_local $7
(set_local $6
(f64.ne
(get_local $1)
(get_local $1)
@ -94,7 +94,7 @@
)
)
(if
(get_local $7)
(get_local $6)
(return
(f64.div
(tee_local $0
@ -115,13 +115,13 @@
(i64.const 1)
)
)
(get_local $4)
(get_local $7)
)
(block
(br_if $folding-inner0
(i64.eq
(get_local $9)
(get_local $4)
(get_local $7)
)
)
(return
@ -132,16 +132,16 @@
(set_local $2
(if (result i64)
(i64.eqz
(get_local $3)
(get_local $4)
)
(i64.shl
(get_local $2)
(i64.add
(i64.sub
(i64.const 0)
(tee_local $3
(tee_local $4
(i64.sub
(get_local $3)
(get_local $4)
(i64.clz
(i64.shl
(get_local $2)
@ -163,22 +163,22 @@
)
)
)
(set_local $5
(set_local $3
(if (result i64)
(i64.eqz
(get_local $6)
(get_local $5)
)
(i64.shl
(get_local $5)
(get_local $3)
(i64.add
(i64.sub
(i64.const 0)
(tee_local $6
(tee_local $5
(i64.sub
(get_local $6)
(get_local $5)
(i64.clz
(i64.shl
(get_local $5)
(get_local $3)
(i64.const 12)
)
)
@ -190,86 +190,80 @@
)
(i64.or
(i64.and
(get_local $5)
(get_local $3)
(i64.const 4503599627370495)
)
(i64.const 4503599627370496)
)
)
)
(block $break|0
(loop $repeat|0
(br_if $break|0
(i64.le_s
(get_local $3)
(get_local $6)
)
(loop $continue|0
(if
(i64.gt_s
(get_local $4)
(get_local $5)
)
(if
(i64.eqz
(i64.shr_u
(tee_local $4
(i64.sub
(block
(if
(i64.ge_u
(get_local $2)
(get_local $3)
)
(block
(br_if $folding-inner0
(i64.eq
(get_local $2)
(get_local $5)
(get_local $3)
)
)
(i64.const 63)
)
)
(block
(br_if $folding-inner0
(i64.eqz
(get_local $4)
(set_local $2
(i64.sub
(get_local $2)
(get_local $3)
)
)
)
(set_local $2
(get_local $4)
)
(set_local $2
(i64.shl
(get_local $2)
(i64.const 1)
)
)
)
(set_local $2
(i64.shl
(get_local $2)
(i64.const 1)
(set_local $4
(i64.sub
(get_local $4)
(i64.const 1)
)
)
(br $continue|0)
)
(set_local $3
(i64.sub
(get_local $3)
(i64.const 1)
)
)
(br $repeat|0)
)
)
(if
(i64.eqz
(i64.shr_u
(tee_local $4
(i64.sub
(get_local $2)
(get_local $5)
)
)
(i64.const 63)
)
(i64.ge_u
(get_local $2)
(get_local $3)
)
(block
(br_if $folding-inner0
(i64.eqz
(get_local $4)
(i64.eq
(get_local $2)
(get_local $3)
)
)
(set_local $2
(get_local $4)
(i64.sub
(get_local $2)
(get_local $3)
)
)
)
)
(set_local $2
(i64.shl
(get_local $2)
(tee_local $4
(tee_local $3
(i64.clz
(i64.shl
(get_local $2)
@ -285,10 +279,10 @@
(tee_local $2
(if (result i64)
(i64.gt_s
(tee_local $3
(tee_local $4
(i64.sub
(get_local $3)
(get_local $4)
(get_local $3)
)
)
(i64.const 0)
@ -299,7 +293,7 @@
(i64.const 4503599627370496)
)
(i64.shl
(get_local $3)
(get_local $4)
(i64.const 52)
)
)
@ -308,7 +302,7 @@
(i64.add
(i64.sub
(i64.const 0)
(get_local $3)
(get_local $4)
)
(i64.const 1)
)
@ -403,7 +397,7 @@
(local $7 i32)
(local $8 i32)
(block $folding-inner0
(set_local $4
(set_local $3
(i32.and
(i32.shr_u
(tee_local $2
@ -419,7 +413,7 @@
(set_local $6
(i32.and
(i32.shr_u
(tee_local $5
(tee_local $4
(i32.reinterpret/f32
(get_local $1)
)
@ -437,29 +431,29 @@
)
(if
(i32.eqz
(tee_local $3
(tee_local $5
(i32.eqz
(tee_local $7
(i32.shl
(get_local $5)
(get_local $4)
(i32.const 1)
)
)
)
)
)
(set_local $3
(set_local $5
(i32.eq
(get_local $4)
(get_local $3)
(i32.const 255)
)
)
)
(if
(i32.eqz
(get_local $3)
(get_local $5)
)
(set_local $3
(set_local $5
(f32.ne
(get_local $1)
(get_local $1)
@ -468,7 +462,7 @@
)
(if
(i32.and
(get_local $3)
(get_local $5)
(i32.const 1)
)
(return
@ -485,7 +479,7 @@
)
(if
(i32.le_u
(tee_local $3
(tee_local $5
(i32.shl
(get_local $2)
(i32.const 1)
@ -496,7 +490,7 @@
(block
(br_if $folding-inner0
(i32.eq
(get_local $3)
(get_local $5)
(get_local $7)
)
)
@ -507,7 +501,7 @@
)
(set_local $2
(if (result i32)
(get_local $4)
(get_local $3)
(i32.or
(i32.and
(get_local $2)
@ -519,9 +513,9 @@
(get_local $2)
(i32.sub
(i32.const 1)
(tee_local $4
(tee_local $3
(i32.sub
(get_local $4)
(get_local $3)
(i32.clz
(i32.shl
(get_local $2)
@ -534,18 +528,18 @@
)
)
)
(set_local $5
(set_local $4
(if (result i32)
(get_local $6)
(i32.or
(i32.and
(get_local $5)
(get_local $4)
(i32.const 8388607)
)
(i32.const 8388608)
)
(i32.shl
(get_local $5)
(get_local $4)
(i32.sub
(i32.const 1)
(tee_local $6
@ -553,7 +547,7 @@
(get_local $6)
(i32.clz
(i32.shl
(get_local $5)
(get_local $4)
(i32.const 9)
)
)
@ -563,79 +557,73 @@
)
)
)
(block $break|0
(loop $repeat|0
(br_if $break|0
(i32.le_s
(get_local $4)
(get_local $6)
)
(loop $continue|0
(if
(i32.gt_s
(get_local $3)
(get_local $6)
)
(if
(i32.eqz
(i32.shr_u
(tee_local $3
(i32.sub
(block
(if
(i32.ge_u
(get_local $2)
(get_local $4)
)
(block
(br_if $folding-inner0
(i32.eq
(get_local $2)
(get_local $5)
(get_local $4)
)
)
(i32.const 31)
)
)
(block
(br_if $folding-inner0
(i32.eqz
(get_local $3)
(set_local $2
(i32.sub
(get_local $2)
(get_local $4)
)
)
)
(set_local $2
(get_local $3)
)
(set_local $2
(i32.shl
(get_local $2)
(i32.const 1)
)
)
)
(set_local $2
(i32.shl
(get_local $2)
(i32.const 1)
(set_local $3
(i32.sub
(get_local $3)
(i32.const 1)
)
)
(br $continue|0)
)
(set_local $4
(i32.sub
(get_local $4)
(i32.const 1)
)
)
(br $repeat|0)
)
)
(if
(i32.eqz
(i32.shr_u
(tee_local $3
(i32.sub
(get_local $2)
(get_local $5)
)
)
(i32.const 31)
)
(i32.ge_u
(get_local $2)
(get_local $4)
)
(block
(br_if $folding-inner0
(i32.eqz
(get_local $3)
(i32.eq
(get_local $2)
(get_local $4)
)
)
(set_local $2
(get_local $3)
(i32.sub
(get_local $2)
(get_local $4)
)
)
)
)
(set_local $2
(i32.shl
(get_local $2)
(tee_local $3
(tee_local $5
(i32.clz
(i32.shl
(get_local $2)
@ -651,10 +639,10 @@
(tee_local $2
(if (result i32)
(i32.gt_s
(tee_local $4
(tee_local $3
(i32.sub
(get_local $4)
(get_local $3)
(get_local $5)
)
)
(i32.const 0)
@ -665,7 +653,7 @@
(i32.const 8388608)
)
(i32.shl
(get_local $4)
(get_local $3)
(i32.const 23)
)
)
@ -673,7 +661,7 @@
(get_local $2)
(i32.sub
(i32.const 1)
(get_local $4)
(get_local $3)
)
)
)