Slim down indexed access as far as currently possible

Interestingly, the same code written as statements is significantly slower. See notes.
This commit is contained in:
dcodeIO
2018-04-23 23:57:15 +02:00
parent de98a19eb8
commit 63aa648ace
16 changed files with 964 additions and 1384 deletions

View File

@@ -2556,7 +2556,7 @@
(call $abort
(i32.const 0)
(i32.const 4)
(i32.const 75)
(i32.const 81)
(i32.const 41)
)
(unreachable)
@@ -2597,53 +2597,18 @@
(get_local $2)
)
)
(func $~lib/array/Array<Body>#__get (; 10 ;) (type $iii) (param $0 i32) (param $1 i32) (result i32)
(local $2 i32)
(if
(i32.ge_u
(get_local $1)
(i32.shr_u
(i32.load
(tee_local $2
(i32.load
(get_local $0)
)
)
)
(i32.const 2)
)
)
(block
(call $abort
(i32.const 0)
(i32.const 4)
(i32.const 64)
(i32.const 37)
)
(unreachable)
)
)
(i32.load offset=8
(i32.add
(get_local $2)
(i32.shl
(get_local $1)
(i32.const 2)
)
)
)
)
(func $assembly/index/NBodySystem#constructor (; 11 ;) (type $iii) (param $0 i32) (param $1 i32) (result i32)
(func $assembly/index/NBodySystem#constructor (; 10 ;) (type $iii) (param $0 i32) (param $1 i32) (result i32)
(local $2 i32)
(local $3 i32)
(local $4 f64)
(local $5 f64)
(local $5 i32)
(local $6 f64)
(local $7 f64)
(local $8 i32)
(set_local $8
(local $8 f64)
(local $9 i32)
(set_local $9
(i32.load offset=4
(tee_local $3
(tee_local $2
(get_local $1)
)
)
@@ -2651,37 +2616,51 @@
(loop $continue|0
(if
(i32.lt_s
(get_local $2)
(get_local $8)
(get_local $3)
(get_local $9)
)
(block
(set_local $4
(f64.load offset=48
(tee_local $3
(call $~lib/array/Array<Body>#__get
(get_local $1)
(get_local $2)
(tee_local $2
(if (result i32)
(i32.lt_u
(tee_local $2
(get_local $3)
)
(i32.shr_u
(i32.load
(tee_local $5
(i32.load
(tee_local $5
(get_local $1)
)
)
)
)
(i32.const 2)
)
)
(i32.load offset=8
(i32.add
(get_local $5)
(i32.shl
(get_local $2)
(i32.const 2)
)
)
)
(unreachable)
)
)
)
)
(set_local $5
(f64.add
(get_local $5)
(f64.mul
(f64.load offset=24
(get_local $3)
)
(get_local $4)
)
)
)
(set_local $6
(f64.add
(get_local $6)
(f64.mul
(f64.load offset=32
(get_local $3)
(f64.load offset=24
(get_local $2)
)
(get_local $4)
)
@@ -2691,16 +2670,27 @@
(f64.add
(get_local $7)
(f64.mul
(f64.load offset=40
(get_local $3)
(f64.load offset=32
(get_local $2)
)
(get_local $4)
)
)
)
(set_local $2
(set_local $8
(f64.add
(get_local $8)
(f64.mul
(f64.load offset=40
(get_local $2)
)
(get_local $4)
)
)
)
(set_local $3
(i32.add
(get_local $2)
(get_local $3)
(i32.const 1)
)
)
@@ -2714,7 +2704,7 @@
(get_local $0)
(block (result i32)
(i32.store
(tee_local $2
(tee_local $3
(call $~lib/allocator/arena/allocate_memory
(i32.const 4)
)
@@ -2722,7 +2712,7 @@
(i32.const 0)
)
(tee_local $0
(get_local $2)
(get_local $3)
)
)
)
@@ -2730,16 +2720,36 @@
)
(f64.store offset=24
(tee_local $1
(call $~lib/array/Array<Body>#__get
(i32.load
(get_local $0)
(if (result i32)
(i32.lt_u
(i32.const 0)
(i32.shr_u
(i32.load
(tee_local $1
(i32.load
(tee_local $1
(i32.load
(get_local $0)
)
)
)
)
)
(i32.const 2)
)
)
(i32.const 0)
(i32.load offset=8
(i32.add
(get_local $1)
(i32.const 0)
)
)
(unreachable)
)
)
(f64.div
(f64.neg
(get_local $5)
(get_local $6)
)
(f64.const 39.47841760435743)
)
@@ -2748,7 +2758,7 @@
(get_local $1)
(f64.div
(f64.neg
(get_local $6)
(get_local $7)
)
(f64.const 39.47841760435743)
)
@@ -2757,14 +2767,14 @@
(get_local $1)
(f64.div
(f64.neg
(get_local $7)
(get_local $8)
)
(f64.const 39.47841760435743)
)
)
(get_local $0)
)
(func $assembly/index/init (; 12 ;) (type $v)
(func $assembly/index/init (; 11 ;) (type $v)
(local $0 i32)
(set_global $assembly/index/system
(call $assembly/index/NBodySystem#constructor
@@ -2850,7 +2860,7 @@
)
)
)
(func $assembly/index/getBody (; 13 ;) (type $ii) (param $0 i32) (result i32)
(func $assembly/index/getBody (; 12 ;) (type $ii) (param $0 i32) (result i32)
(local $1 i32)
(if (result i32)
(i32.lt_u
@@ -2863,22 +2873,43 @@
)
)
)
(call $~lib/array/Array<Body>#__get
(get_local $1)
(get_local $0)
(if (result i32)
(i32.lt_u
(get_local $0)
(i32.shr_u
(i32.load
(tee_local $1
(i32.load
(get_local $1)
)
)
)
(i32.const 2)
)
)
(i32.load offset=8
(i32.add
(get_local $1)
(i32.shl
(get_local $0)
(i32.const 2)
)
)
)
(unreachable)
)
(i32.const 0)
)
)
(func $assembly/index/NBodySystem#advance (; 14 ;) (type $iFv) (param $0 i32) (param $1 f64)
(func $assembly/index/NBodySystem#advance (; 13 ;) (type $iFv) (param $0 i32) (param $1 f64)
(local $2 i32)
(local $3 f64)
(local $3 i32)
(local $4 f64)
(local $5 i32)
(local $6 f64)
(local $5 f64)
(local $6 i32)
(local $7 f64)
(local $8 f64)
(local $9 i32)
(local $9 f64)
(local $10 f64)
(local $11 f64)
(local $12 f64)
@@ -2888,6 +2919,7 @@
(local $16 f64)
(local $17 f64)
(local $18 f64)
(local $19 i32)
(set_local $14
(i32.load offset=4
(tee_local $0
@@ -2902,16 +2934,41 @@
(loop $continue|0
(if
(i32.lt_s
(get_local $5)
(get_local $6)
(get_local $14)
)
(block
(set_local $15
(f64.load
(tee_local $0
(call $~lib/array/Array<Body>#__get
(get_local $13)
(get_local $5)
(if (result i32)
(i32.lt_u
(tee_local $3
(get_local $6)
)
(i32.shr_u
(i32.load
(tee_local $0
(i32.load
(tee_local $0
(get_local $13)
)
)
)
)
(i32.const 2)
)
)
(i32.load offset=8
(i32.add
(get_local $0)
(i32.shl
(get_local $3)
(i32.const 2)
)
)
)
(unreachable)
)
)
)
@@ -2926,17 +2983,17 @@
(get_local $0)
)
)
(set_local $6
(set_local $7
(f64.load offset=24
(get_local $0)
)
)
(set_local $7
(set_local $8
(f64.load offset=32
(get_local $0)
)
)
(set_local $8
(set_local $9
(f64.load offset=40
(get_local $0)
)
@@ -2946,22 +3003,22 @@
(get_local $0)
)
)
(set_local $9
(set_local $3
(i32.add
(get_local $5)
(get_local $6)
(i32.const 1)
)
)
(loop $continue|1
(if
(i32.lt_s
(get_local $9)
(get_local $3)
(get_local $14)
)
(block
(set_local $4
(set_local $5
(f64.sqrt
(tee_local $3
(tee_local $4
(f64.add
(f64.add
(f64.mul
@@ -2970,9 +3027,34 @@
(get_local $15)
(f64.load
(tee_local $2
(call $~lib/array/Array<Body>#__get
(get_local $13)
(get_local $9)
(if (result i32)
(i32.lt_u
(tee_local $19
(get_local $3)
)
(i32.shr_u
(i32.load
(tee_local $2
(i32.load
(tee_local $2
(get_local $13)
)
)
)
)
(i32.const 2)
)
)
(i32.load offset=8
(i32.add
(get_local $2)
(i32.shl
(get_local $19)
(i32.const 2)
)
)
)
(unreachable)
)
)
)
@@ -3007,31 +3089,15 @@
)
)
)
(set_local $4
(set_local $5
(f64.mul
(get_local $18)
(tee_local $3
(tee_local $4
(f64.div
(get_local $1)
(f64.mul
(get_local $3)
(get_local $4)
)
)
)
)
)
(set_local $6
(f64.sub
(get_local $6)
(f64.mul
(get_local $10)
(tee_local $3
(f64.mul
(f64.load offset=48
(get_local $2)
)
(get_local $3)
(get_local $5)
)
)
)
@@ -3041,17 +3107,33 @@
(f64.sub
(get_local $7)
(f64.mul
(get_local $11)
(get_local $3)
(get_local $10)
(tee_local $4
(f64.mul
(f64.load offset=48
(get_local $2)
)
(get_local $4)
)
)
)
)
)
(set_local $8
(f64.sub
(get_local $8)
(f64.mul
(get_local $11)
(get_local $4)
)
)
)
(set_local $9
(f64.sub
(get_local $9)
(f64.mul
(get_local $12)
(get_local $3)
(get_local $4)
)
)
)
@@ -3063,7 +3145,7 @@
)
(f64.mul
(get_local $10)
(get_local $4)
(get_local $5)
)
)
)
@@ -3075,7 +3157,7 @@
)
(f64.mul
(get_local $11)
(get_local $4)
(get_local $5)
)
)
)
@@ -3087,13 +3169,13 @@
)
(f64.mul
(get_local $12)
(get_local $4)
(get_local $5)
)
)
)
(set_local $9
(set_local $3
(i32.add
(get_local $9)
(get_local $3)
(i32.const 1)
)
)
@@ -3103,15 +3185,15 @@
)
(f64.store offset=24
(get_local $0)
(get_local $6)
(get_local $7)
)
(f64.store offset=32
(get_local $0)
(get_local $7)
(get_local $8)
)
(f64.store offset=40
(get_local $0)
(get_local $8)
(get_local $9)
)
(f64.store
(get_local $0)
@@ -3121,7 +3203,7 @@
)
(f64.mul
(get_local $1)
(get_local $6)
(get_local $7)
)
)
)
@@ -3133,7 +3215,7 @@
)
(f64.mul
(get_local $1)
(get_local $7)
(get_local $8)
)
)
)
@@ -3145,13 +3227,13 @@
)
(f64.mul
(get_local $1)
(get_local $8)
(get_local $9)
)
)
)
(set_local $5
(set_local $6
(i32.add
(get_local $5)
(get_local $6)
(i32.const 1)
)
)
@@ -3160,7 +3242,7 @@
)
)
)
(func $assembly/index/NBodySystem#energy (; 15 ;) (type $iF) (param $0 i32) (result f64)
(func $assembly/index/NBodySystem#energy (; 14 ;) (type $iF) (param $0 i32) (result f64)
(local $1 f64)
(local $2 i32)
(local $3 i32)
@@ -3171,6 +3253,7 @@
(local $8 f64)
(local $9 f64)
(local $10 f64)
(local $11 i32)
(set_local $5
(i32.load offset=4
(tee_local $0
@@ -3185,16 +3268,41 @@
(loop $continue|0
(if
(i32.lt_s
(get_local $2)
(get_local $3)
(get_local $5)
)
(block
(set_local $7
(f64.load
(tee_local $0
(call $~lib/array/Array<Body>#__get
(get_local $4)
(get_local $2)
(if (result i32)
(i32.lt_u
(tee_local $2
(get_local $3)
)
(i32.shr_u
(i32.load
(tee_local $0
(i32.load
(tee_local $0
(get_local $4)
)
)
)
)
(i32.const 2)
)
)
(i32.load offset=8
(i32.add
(get_local $0)
(i32.shl
(get_local $2)
(i32.const 2)
)
)
)
(unreachable)
)
)
)
@@ -3254,7 +3362,7 @@
)
(set_local $0
(i32.add
(get_local $2)
(get_local $3)
(i32.const 1)
)
)
@@ -3269,10 +3377,35 @@
(f64.sub
(get_local $7)
(f64.load
(tee_local $3
(call $~lib/array/Array<Body>#__get
(get_local $4)
(get_local $0)
(tee_local $2
(if (result i32)
(i32.lt_u
(tee_local $11
(get_local $0)
)
(i32.shr_u
(i32.load
(tee_local $2
(i32.load
(tee_local $2
(get_local $4)
)
)
)
)
(i32.const 2)
)
)
(i32.load offset=8
(i32.add
(get_local $2)
(i32.shl
(get_local $11)
(i32.const 2)
)
)
)
(unreachable)
)
)
)
@@ -3285,7 +3418,7 @@
(f64.mul
(get_local $10)
(f64.load offset=48
(get_local $3)
(get_local $2)
)
)
(f64.sqrt
@@ -3300,7 +3433,7 @@
(f64.sub
(get_local $8)
(f64.load offset=8
(get_local $3)
(get_local $2)
)
)
)
@@ -3312,7 +3445,7 @@
(f64.sub
(get_local $9)
(f64.load offset=16
(get_local $3)
(get_local $2)
)
)
)
@@ -3333,9 +3466,9 @@
)
)
)
(set_local $2
(set_local $3
(i32.add
(get_local $2)
(get_local $3)
(i32.const 1)
)
)
@@ -3345,7 +3478,7 @@
)
(get_local $1)
)
(func $assembly/index/step (; 16 ;) (type $F) (result f64)
(func $assembly/index/step (; 15 ;) (type $F) (result f64)
(call $assembly/index/NBodySystem#advance
(get_global $assembly/index/system)
(f64.const 0.01)
@@ -3354,7 +3487,7 @@
(get_global $assembly/index/system)
)
)
(func $assembly/index/bench (; 17 ;) (type $iv) (param $0 i32)
(func $assembly/index/bench (; 16 ;) (type $iv) (param $0 i32)
(local $1 i32)
(loop $continue|0
(if
@@ -3378,7 +3511,7 @@
)
)
)
(func $start (; 18 ;) (type $v)
(func $start (; 17 ;) (type $v)
(set_global $~lib/allocator/arena/startOffset
(i32.and
(i32.add