Implement Mathf.sin/cos/tan (#491)

2025-06-12 06:21:29 +00:00 · 2019-03-05 20:36:22 +02:00
parent 8e5b9c734b
commit 208dc2f1de
7 changed files with 29448 additions and 20485 deletions
--- a/std/assembly/index.d.ts
+++ b/std/assembly/index.d.ts
@ -1401,12 +1401,18 @@ interface IMath<T> {
 }

 interface INativeMath<T> extends IMath<T> {
+  /** Contains sin value produced after Math/Mathf.sincos */
+  sincos_sin: T;
+  /** Contains cos value produced after Math/Mathf.sincos */
+  sincos_cos: T;
  /** Seeds the random number generator. */
  seedRandom(value: i64): void;
  /** Returns the floating-point remainder of `x / y` (rounded towards zero). */
  mod(x: T, y: T): T;
  /** Returns the floating-point remainder of `x / y` (rounded to nearest). */
  rem(x: T, y: T): T;
+  /** Returns sin and cos simultaneously for same angle. Results stored to `sincos_s32/64` and `sincos_c32/64` globals */
+  sincos(x: T): void;
 }

 /** Double precision math imported from JavaScript. */
--- a/std/assembly/math.ts
+++ b/std/assembly/math.ts
@ -22,8 +22,9 @@ import {
 //
 // Applies to all functions marked with a comment referring here.

-// TODO: sin, cos, tan
+// TODO: sin, cos, tan for Math namespace

+/** @internal */
 function R(z: f64): f64 { // Rational approximation of (asin(x)-x)/x^3
  const                   // see: musl/src/math/asin.c and SUN COPYRIGHT NOTICE above
    pS0 = reinterpret<f64>(0x3FC5555555555555), //  1.66666666666666657415e-01
@ -49,6 +50,7 @@ function R(z: f64): f64 { // Rational approximation of (asin(x)-x)/x^3
  return NativeMath.exp(x - kln2) * scale * scale;
 }

+/** @internal */
@lazy var random_seeded = false;
@lazy var random_state0_64: u64;
@lazy var random_state1_64: u64;
@ -1092,7 +1094,7 @@ export namespace NativeMath {
      }
    } else if (n < -1022) {
      /* make sure final n < -53 to avoid double
-		   rounding in the subnormal range */
+       rounding in the subnormal range */
      y *= Ox1p_1022 * Ox1p53;
      n += 1022 - 53;
      if (n < -1022) {
@ -1230,6 +1232,16 @@ export namespace NativeMath {
  }
 }

+/** @internal */
+@lazy var rempio2f_y: f64;
+@lazy const PIO2_TABLE: u64[] = [
+  0xA2F9836E4E441529,
+  0xFC2757D1F534DDC0,
+  0xDB6295993C439041,
+  0xFE5163ABDEBBC561
+];
+
+/** @internal */
 function Rf(z: f32): f32 { // Rational approximation of (asin(x)-x)/x^3
  const                    // see: musl/src/math/asinf.c and SUN COPYRIGHT NOTICE above
    pS0 = reinterpret<f32>(0x3E2AAA75), //  1.6666586697e-01f
@ -1242,13 +1254,110 @@ function Rf(z: f32): f32 { // Rational approximation of (asin(x)-x)/x^3
 }

@inline function expo2f(x: f32): f32 { // exp(x)/2 for x >= log(DBL_MAX)
-  const                        // see: musl/src/math/__expo2f.c
+  const                                // see: musl/src/math/__expo2f.c
    k    = <u32>235,
    kln2 = reinterpret<f32>(0x4322E3BC); // 0x1.45c778p+7f
  var scale = reinterpret<f32>(<u32>(0x7F + (k >> 1)) << 23);
  return NativeMathf.exp(x - kln2) * scale * scale;
 }

+@inline /** @internal */
+function pio2_large_quot(x: f32, u: i32): i32 {       // see: jdh8/metallic/blob/master/src/math/float/rem_pio2f.c
+  const coeff = reinterpret<f64>(0x3BF921FB54442D18); // π * 0x1p-65 = 8.51530395021638647334e-20
+  const bits = PIO2_TABLE;
+
+  var offset = (u >> 23) - 152;
+  var index  = offset >> 6;
+  var shift  = offset & 63;
+
+  var b0 = unchecked(bits[index + 0]);
+  var b1 = unchecked(bits[index + 1]);
+  var lo: u64;
+
+  if (shift > 32) {
+    let b2 = unchecked(bits[index + 2]);
+    lo  = b2 >> (96 - shift);
+    lo |= b1 << (shift - 32);
+  } else {
+    lo = b1 >> (32 - shift);
+  }
+
+  var hi = (b1 >> (64 - shift)) | (b0 << shift);
+  var mantissa: u64 = (u & 0x007FFFFF) | 0x00800000;
+  var product: u64 = mantissa * hi + (mantissa * lo >> 32);
+  var r: i64 = product << 2;
+  var q: i32 = <i32>((product >> 62) + (r >>> 63));
+  rempio2f_y = copysign<f64>(coeff, x) * <f64>r;
+  return q;
+}
+
+@inline /** @internal */
+function rempio2f(x: f32, u: u32, sign: i32): i32 {   // see: jdh8/metallic/blob/master/src/math/float/rem_pio2f.c
+  const pi2hi = reinterpret<f64>(0x3FF921FB50000000); // 1.57079631090164184570
+  const pi2lo = reinterpret<f64>(0x3E5110B4611A6263); // 1.58932547735281966916e-8
+  const _2_pi = reinterpret<f64>(0x3FE45F306DC9C883); // 0.63661977236758134308
+
+  if (u < 0x4DC90FDB) { /* π * 0x1p28 */
+    let q = nearest(x * _2_pi);
+    rempio2f_y = x - q * pi2hi - q * pi2lo;
+    return <i32>q;
+  }
+
+  var q = pio2_large_quot(x, u);
+  return select(-q, q, sign);
+}
+
+/* |sin(x)/x - s(x)| < 2**-37.5 (~[-4.89e-12, 4.824e-12]). */
+@inline /** @internal */
+function sin_kernf(x: f64): f32 { // see: musl/tree/src/math/__sindf.c
+  const S1 = reinterpret<f64>(0xBFC5555554CBAC77); // -0x15555554cbac77.0p-55
+  const S2 = reinterpret<f64>(0x3F811110896EFBB2); //  0x111110896efbb2.0p-59
+  const S3 = reinterpret<f64>(0xBF2A00F9E2CAE774); // -0x1a00f9e2cae774.0p-65
+  const S4 = reinterpret<f64>(0x3EC6CD878C3B46A7); //  0x16cd878c3b46a7.0p-71
+
+  var z = x * x;
+  var w = z * z;
+  var r = S3 + z * S4;
+  var s = z * x;
+  return <f32>((x + s * (S1 + z * S2)) + s * w * r);
+}
+
+/* |cos(x) - c(x)| < 2**-34.1 (~[-5.37e-11, 5.295e-11]). */
+@inline /** @internal */
+function cos_kernf(x: f64): f32 { // see: musl/tree/src/math/__cosdf.c
+  const C0 = reinterpret<f64>(0xBFDFFFFFFD0C5E81); // -0x1ffffffd0c5e81.0p-54
+  const C1 = reinterpret<f64>(0x3FA55553E1053A42); //  0x155553e1053a42.0p-57
+  const C2 = reinterpret<f64>(0xBF56C087E80F1E27); // -0x16c087e80f1e27.0p-62
+  const C3 = reinterpret<f64>(0x3EF99342E0EE5069); //  0x199342e0ee5069.0p-68
+
+  var z = x * x;
+  var w = z * z;
+  var r = C2 + z * C3;
+  return <f32>(((1 + z * C0) + w * C1) + (w * z) * r);
+}
+
+/* |tan(x)/x - t(x)| < 2**-25.5 (~[-2e-08, 2e-08]). */
+@inline /** @internal */
+function tan_kernf(x: f64, odd: i32): f32 { // see: musl/tree/src/math/__tandf.c
+
+  const T0 = reinterpret<f64>(0x3FD5554D3418C99F); /* 0x15554d3418c99f.0p-54 */
+  const T1 = reinterpret<f64>(0x3FC112FD38999F72); /* 0x1112fd38999f72.0p-55 */
+  const T2 = reinterpret<f64>(0x3FAB54C91D865AFE); /* 0x1b54c91d865afe.0p-57 */
+  const T3 = reinterpret<f64>(0x3F991DF3908C33CE); /* 0x191df3908c33ce.0p-58 */
+  const T4 = reinterpret<f64>(0x3F685DADFCECF44E); /* 0x185dadfcecf44e.0p-61 */
+  const T5 = reinterpret<f64>(0x3F8362B9BF971BCD); /* 0x1362b9bf971bcd.0p-59 */
+
+  var z = x * x;
+  var r = T4 + z * T5;
+  var t = T2 + z * T3;
+  var w = z * z;
+  var s = z * x;
+  var u = T0 + z * T1;
+
+  r = (x + s * u) + (s * w) * (t + w * r);
+  return <f32>(odd ? -1 / r : r);
+}
+
 export namespace NativeMathf {

  @lazy export const E       = <f32>NativeMath.E;
@ -1260,6 +1369,10 @@ export namespace NativeMathf {
  @lazy export const SQRT1_2 = <f32>NativeMath.SQRT1_2;
  @lazy export const SQRT2   = <f32>NativeMath.SQRT2;

+  /** Used as return values from Mathf.sincos */
+  @lazy export var sincos_sin: f32 = 0;
+  @lazy export var sincos_cos: f32 = 0;
+
  @inline
  export function abs(x: f32): f32 {
    return builtin_abs<f32>(x);
@ -1508,9 +1621,50 @@ export namespace NativeMathf {
    );
  }

-  export function cos(x: f32): f32 { // TODO
-    unreachable();
-    return 0;
+  export function cos(x: f32): f32 { // see: musl/src/math/cosf.c
+    const c1pio2 = reinterpret<f64>(0x3FF921FB54442D18); // M_PI_2 * 1
+    const c2pio2 = reinterpret<f64>(0x400921FB54442D18); // M_PI_2 * 2
+    const c3pio2 = reinterpret<f64>(0x4012D97C7F3321D2); // M_PI_2 * 3
+    const c4pio2 = reinterpret<f64>(0x401921FB54442D18); // M_PI_2 * 4
+
+    var ix = reinterpret<u32>(x);
+    var sign = ix >> 31;
+    ix &= 0x7FFFFFFF;
+
+    if (ix <= 0x3f490fda) {  /* |x| ~<= π/4 */
+      if (ix < 0x39800000) { /* |x| < 2**-12 */
+        /* raise inexact if x != 0 */
+        return 1;
+      }
+      return cos_kernf(x);
+    }
+
+    if (ASC_SHRINK_LEVEL < 1) {
+      if (ix <= 0x407b53d1) {  /* |x| ~<= 5π/4 */
+        if (ix > 0x4016cbe3) { /* |x|  ~> 3π/4 */
+          return -cos_kernf(sign ? x + c2pio2 : x - c2pio2);
+        } else {
+          return sign ? sin_kernf(x + c1pio2) : sin_kernf(c1pio2 - x);
+        }
+      }
+      if (ix <= 0x40e231d5) {  /* |x| ~<= 9π/4 */
+        if (ix > 0x40afeddf) { /* |x|  ~> 7π/4 */
+          return cos_kernf(sign ? x + c4pio2 : x - c4pio2);
+        } else {
+          return sign ? sin_kernf(-x - c3pio2) : sin_kernf(x - c3pio2);
+        }
+      }
+    }
+
+    /* cos(Inf or NaN) is NaN */
+    if (ix >= 0x7f800000) return x - x;
+
+    /* general argument reduction needed */
+    var n = rempio2f(x, ix, sign);
+    var y = rempio2f_y;
+
+    var t = n & 1 ? sin_kernf(y) : cos_kernf(y);
+    return (n + 1) & 2 ? -t : t;
  }

  export function cosh(x: f32): f32 { // see: musl/src/math/coshf.c
@ -2094,9 +2248,47 @@ export namespace NativeMathf {
    return <bool>((reinterpret<u32>(x) >>> 31) & (x == x));
  }

-  export function sin(x: f32): f32 { // TODO
-    unreachable();
-    return 0;
+  export function sin(x: f32): f32 { // see: musl/src/math/sinf.c
+    const s1pio2 = reinterpret<f64>(0x3FF921FB54442D18); // M_PI_2 * 1
+    const s2pio2 = reinterpret<f64>(0x400921FB54442D18); // M_PI_2 * 2
+    const s3pio2 = reinterpret<f64>(0x4012D97C7F3321D2); // M_PI_2 * 3
+    const s4pio2 = reinterpret<f64>(0x401921FB54442D18); // M_PI_2 * 4
+
+    var ix = reinterpret<u32>(x);
+    var sign = ix >> 31;
+    ix &= 0x7FFFFFFF;
+
+    if (ix <= 0x3f490fda) {  /* |x| ~<= π/4 */
+      if (ix < 0x39800000) { /* |x| < 2**-12 */
+        return x;
+      }
+      return sin_kernf(x);
+    }
+
+    if (ASC_SHRINK_LEVEL < 1) {
+      if (ix <= 0x407b53d1) {   /* |x| ~<= 5π/4 */
+        if (ix <= 0x4016cbe3) { /* |x| ~<= 3π/4 */
+          return sign ? -cos_kernf(x + s1pio2) : cos_kernf(x - s1pio2);
+        }
+        return sin_kernf(-(sign ? x + s2pio2 : x - s2pio2));
+      }
+
+      if (ix <= 0x40e231d5) {   /* |x| ~<= 9π/4 */
+        if (ix <= 0x40afeddf) { /* |x| ~<= 7π/4 */
+          return sign ? cos_kernf(x + s3pio2) : -cos_kernf(x - s3pio2);
+        }
+        return sin_kernf(sign ? x + s4pio2 : x - s4pio2);
+      }
+    }
+
+    /* sin(Inf or NaN) is NaN */
+    if (ix >= 0x7f800000) return x - x;
+
+    var n = rempio2f(x, ix, sign);
+    var y = rempio2f_y;
+
+    var t = n & 1 ? cos_kernf(y) : sin_kernf(y);
+    return n & 2 ? -t : t;
  }

  export function sinh(x: f32): f32 { // see: musl/src/math/sinhf.c
@ -2121,9 +2313,44 @@ export namespace NativeMathf {
    return builtin_sqrt<f32>(x);
  }

-  export function tan(x: f32): f32 { // TODO
-    unreachable();
-    return 0;
+  export function tan(x: f32): f32 { // see: musl/src/math/tanf.c
+    const t1pio2 = reinterpret<f64>(0x3FF921FB54442D18); // 1 * M_PI_2
+    const t2pio2 = reinterpret<f64>(0x400921FB54442D18); // 2 * M_PI_2
+    const t3pio2 = reinterpret<f64>(0x4012D97C7F3321D2); // 3 * M_PI_2
+    const t4pio2 = reinterpret<f64>(0x401921FB54442D18); // 4 * M_PI_2
+
+    var ix = reinterpret<u32>(x);
+    var sign = ix >> 31;
+    ix &= 0x7FFFFFFF;
+
+    if (ix <= 0x3f490fda) {  /* |x| ~<= π/4 */
+      if (ix < 0x39800000) { /* |x| < 2**-12 */
+        return x;
+      }
+      return tan_kernf(x, 0);
+    }
+    if (ix <= 0x407b53d1) {   /* |x| ~<= 5π/4 */
+      if (ix <= 0x4016cbe3) { /* |x| ~<= 3π/4 */
+        return tan_kernf((sign ? x + t1pio2 : x - t1pio2), 1);
+      } else {
+        return tan_kernf((sign ? x + t2pio2 : x - t2pio2), 0);
+      }
+    }
+    if (ix <= 0x40e231d5) {   /* |x| ~<= 9π/4 */
+      if (ix <= 0x40afeddf) { /* |x| ~<= 7π/4 */
+        return tan_kernf((sign ? x + t3pio2 : x - t3pio2), 1);
+      } else {
+        return tan_kernf((sign ? x + t4pio2 : x - t4pio2), 0);
+      }
+    }
+
+    /* tan(Inf or NaN) is NaN */
+    if (ix >= 0x7f800000) return x - x;
+
+    /* argument reduction */
+    var n = rempio2f(x, ix, sign);
+    var y = rempio2f_y;
+    return tan_kernf(y, n & 1);
  }

  export function tanh(x: f32): f32 { // see: musl/src/math/tanhf.c
@ -2298,6 +2525,101 @@ export namespace NativeMathf {
    }
    return sx ? -x : x;
  }
+
+  export function sincos(x: f32): void { // see: musl/tree/src/math/sincosf.c
+    const s1pio2 = reinterpret<f64>(0x3FF921FB54442D18); // 1 * M_PI_2
+    const s2pio2 = reinterpret<f64>(0x400921FB54442D18); // 2 * M_PI_2
+    const s3pio2 = reinterpret<f64>(0x4012D97C7F3321D2); // 3 * M_PI_2
+    const s4pio2 = reinterpret<f64>(0x401921FB54442D18); // 4 * M_PI_2
+
+    var ix = reinterpret<u32>(x);
+    var sign = ix >> 31;
+    ix &= 0x7fffffff;
+
+    if (ix <= 0x3f490fda) {  /* |x| ~<= π/4 */
+      if (ix < 0x39800000) { /* |x| < 2**-12 */
+        sincos_s32 = x;
+        sincos_c32 = 1;
+        return;
+      }
+      sincos_s32 = sin_kernf(x);
+      sincos_c32 = cos_kernf(x);
+      return;
+    }
+
+    if (ASC_SHRINK_LEVEL < 1) {
+      if (ix <= 0x407b53d1) {   /* |x| ~<= 5π/4 */
+        if (ix <= 0x4016cbe3) { /* |x| ~<= 3π/4 */
+          if (sign) {
+            sincos_s32 = -cos_kernf(x + s1pio2);
+            sincos_c32 =  sin_kernf(x + s1pio2);
+          } else {
+            sincos_s32 = cos_kernf(s1pio2 - x);
+            sincos_c32 = sin_kernf(s1pio2 - x);
+          }
+          return;
+        }
+        /* -sin(x + c) is not correct if x+c could be 0: -0 vs +0 */
+        sincos_s32 = -sin_kernf(sign ? x + s2pio2 : x - s2pio2);
+        sincos_c32 = -cos_kernf(sign ? x + s2pio2 : x - s2pio2);
+        return;
+      }
+
+      if (ix <= 0x40e231d5) {   /* |x| ~<= 9π/4 */
+        if (ix <= 0x40afeddf) { /* |x| ~<= 7π/4 */
+          if (sign) {
+            sincos_s32 =  cos_kernf(x + s3pio2);
+            sincos_c32 = -sin_kernf(x + s3pio2);
+          } else {
+            sincos_s32 = -cos_kernf(x - s3pio2);
+            sincos_c32 =  sin_kernf(x - s3pio2);
+          }
+          return;
+        }
+        sincos_s32 = sin_kernf(sign ? x + s4pio2 : x - s4pio2);
+        sincos_c32 = cos_kernf(sign ? x + s4pio2 : x - s4pio2);
+        return;
+      }
+    }
+
+    /* sin(Inf or NaN) is NaN */
+    if (ix >= 0x7f800000) {
+      let xx = x - x;
+      sincos_s32 = xx;
+      sincos_c32 = xx;
+      return;
+    }
+
+    /* general argument reduction needed */
+    var n = rempio2f(x, ix, sign);
+    var y = rempio2f_y;
+    var s = sin_kernf(y);
+    var c = cos_kernf(y);
+
+    switch (n & 3) {
+      case 0: {
+        sincos_s32 = s;
+        sincos_c32 = c;
+        break;
+      }
+      case 1: {
+        sincos_s32 =  c;
+        sincos_c32 = -s;
+        break;
+      }
+      case 2: {
+        sincos_s32 = -s;
+        sincos_c32 = -c;
+        break;
+      }
+      case 3:
+      default: {
+        sincos_s32 = -c;
+        sincos_c32 =  s;
+        break;
+      }
+    }
+  }
 }

 export function ipow32(x: i32, e: i32): i32 {