simplified math.sumPrecise()

author Fabrice Bellard <fabrice@bellard.org>

Mon, 29 Sep 2025 14:12:23 +0000 (16:12 +0200)

committer Fabrice Bellard <fabrice@bellard.org>

Mon, 29 Sep 2025 14:12:23 +0000 (16:12 +0200)
author Fabrice Bellard <fabrice@bellard.org>
Mon, 29 Sep 2025 14:12:23 +0000 (16:12 +0200)
committer Fabrice Bellard <fabrice@bellard.org>
Mon, 29 Sep 2025 14:12:23 +0000 (16:12 +0200)
diff --git a/quickjs.c b/quickjs.c

index 2da7f522cdecb22b3484743771695d9bdee72181..f0f155db710f8c2351df638b1e39d05814df7c9d 100644 (file)
--- a/quickjs.c
+++ b/quickjs.c
@@ -45458,47 +45458,59 @@ static JSValue js_math_clz32(JSContext *ctx, JSValueConst this_val,
      return JS_NewInt32(ctx, r);
  }
  
-/* we add one extra limb to avoid having to test for overflows during the sum */
-#define SUM_PRECISE_ACC_LEN 34
-
  typedef enum {
-    SUM_PRECISE_STATE_MINUS_ZERO,
      SUM_PRECISE_STATE_FINITE,
      SUM_PRECISE_STATE_INFINITY,
      SUM_PRECISE_STATE_MINUS_INFINITY, /* must be after SUM_PRECISE_STATE_INFINITY */
      SUM_PRECISE_STATE_NAN, /* must be after SUM_PRECISE_STATE_MINUS_INFINITY */
  } SumPreciseStateEnum;
  
+#define SP_LIMB_BITS 56
+#define SP_RND_BITS (SP_LIMB_BITS - 53)
+/* we add one extra limb to avoid having to test for overflows during the sum */
+#define SUM_PRECISE_ACC_LEN 39
+
+#define SUM_PRECISE_COUNTER_INIT 250
+
  typedef struct {
-    uint64_t acc[SUM_PRECISE_ACC_LEN];
-    int n_limbs; /* acc is not necessarily normalized */
      SumPreciseStateEnum state;
+    uint32_t counter;
+    int n_limbs; /* 'acc' contains n_limbs and is not necessarily
+                    acc[n_limb - 1] may be 0. 0 indicates minus zero
+                    result when state = SUM_PRECISE_STATE_FINITE */
+    int64_t acc[SUM_PRECISE_ACC_LEN];
  } SumPreciseState;
  
  static void sum_precise_init(SumPreciseState *s)
  {
-    s->state = SUM_PRECISE_STATE_MINUS_ZERO;
-    s->acc[0] = 0;
-    s->n_limbs = 1;
+    memset(s->acc, 0, sizeof(s->acc));
+    s->state = SUM_PRECISE_STATE_FINITE;
+    s->counter = SUM_PRECISE_COUNTER_INIT;
+    s->n_limbs = 0;
  }
  
-#define ADDC64(res, carry_out, op1, op2, carry_in)      \
-do {                                                    \
-    uint64_t __v, __a, __k, __k1;                       \
-    __v = (op1);                                        \
-    __a = __v + (op2);                                  \
-    __k1 = __a < __v;                                   \
-    __k = (carry_in);                                   \
-    __a = __a + __k;                                    \
-    carry_out = (__a < __k) | __k1;                     \
-    res = __a;                                          \
-} while (0)
+static void sum_precise_renorm(SumPreciseState *s)
+{
+    int64_t v, carry;
+    int i;
+    
+    carry = 0;
+    for(i = 0; i < s->n_limbs; i++) {
+        v = s->acc[i] + carry;
+        s->acc[i] = v & (((uint64_t)1 << SP_LIMB_BITS) - 1);
+        carry = v >> SP_LIMB_BITS;
+    }
+    /* we add a failsafe but it should be never reached in a
+       reasonnable amount of time */
+    if (carry != 0 && s->n_limbs < SUM_PRECISE_ACC_LEN)
+        s->acc[s->n_limbs++] = carry;
+}
  
  static void sum_precise_add(SumPreciseState *s, double d)
  {
-    uint64_t a, m, a0, carry, acc_sign, a_sign;
-    int sgn, e, p, n, i;
-    unsigned shift;
+    uint64_t a, m, a0, a1;
+    int sgn, e, p;
+    unsigned int shift;
      
      a = float64_as_uint64(d);
      sgn = a >> 63;
@@ -45521,8 +45533,8 @@ static void sum_precise_add(SumPreciseState *s, double d)
      } else if (e == 0) {
          if (likely(m == 0)) {
              /* zero */
-            if (s->state == SUM_PRECISE_STATE_MINUS_ZERO && !sgn)
-                s->state = SUM_PRECISE_STATE_FINITE;
+            if (s->n_limbs == 0 && !sgn)
+                s->n_limbs = 1;
          } else {
              /* subnormal */
              p = 0;
@@ -45530,69 +45542,41 @@ static void sum_precise_add(SumPreciseState *s, double d)
              goto add;
          }
      } else {
+        /* Note: we sum even if state != SUM_PRECISE_STATE_FINITE to
+           avoid tests */
          m |= (uint64_t)1 << 52;
          shift = e - 1;
-        p = shift / 64;
-        /* 'p' is the position of a0 in acc */
-        shift %= 64;
+        /* 'p' is the position of a0 in acc. The division is normally
+           implementation as a multiplication by the compiler. */
+        p = shift / SP_LIMB_BITS;
+        shift %= SP_LIMB_BITS;
      add:
-        if (s->state >= SUM_PRECISE_STATE_INFINITY)
-            return;
-        s->state = SUM_PRECISE_STATE_FINITE;
-        n = s->n_limbs;
-
-        acc_sign = (int64_t)s->acc[n - 1] >> 63;
-
-        /* sign extend acc */
-        for(i = n; i <= p; i++)
-            s->acc[i] = acc_sign;
-        
-        carry = sgn;
-        a_sign = -sgn;
-        a0 = m << shift;
-        ADDC64(s->acc[p], carry, s->acc[p], a0 ^ a_sign, carry);
-        if (shift >= 12) {
-            p++;
-            if (p >= n)
-                s->acc[p] = acc_sign;
-            a0 = m >> (64 - shift);
-            ADDC64(s->acc[p], carry, s->acc[p], a0 ^ a_sign, carry);
-        }
-        p++;
-        if (p >= n) {
-            n = p;
+        a0 = (m << shift) & (((uint64_t)1 << SP_LIMB_BITS) - 1);
+        a1 = m >> (SP_LIMB_BITS - shift);
+        if (!sgn) {
+            s->acc[p] += a0;
+            s->acc[p + 1] += a1;
          } else {
-            /* carry */
-            for(i = p; i < n; i++) {
-                /* if 'a' positive: stop condition: carry = 0.
-                   if 'a' negative: stop condition: carry = 1. */
-                if (carry == sgn)
-                    goto done;
-                ADDC64(s->acc[i], carry, s->acc[i], a_sign, carry);
-            }
+            s->acc[p] -= a0;
+            s->acc[p + 1] -= a1;
          }
-        
-        /* extend the accumulator if needed */
-        a0 = carry + acc_sign + a_sign;
-        /* -1 <= a0 <= 1 (if both acc and a are negative, carry is set) */
-        if (a0 != ((int64_t)s->acc[n - 1] >> 63)) {
-            s->acc[n++] = a0;
+        s->n_limbs = max_int(s->n_limbs, p + 2);
+
+        if (unlikely(--s->counter == 0)) {
+            s->counter = SUM_PRECISE_COUNTER_INIT;
+            sum_precise_renorm(s);
          }
-    done:
-        s->n_limbs = n;
      }
  }
  
  static double sum_precise_get_result(SumPreciseState *s)
  {
-    int n, shift, e, p, is_neg, i;
-    uint64_t m, addend, carry;
+    int n, shift, e, p, is_neg;
+    uint64_t m, addend;
          
      if (s->state != SUM_PRECISE_STATE_FINITE) {
          switch(s->state) {
          default:
-        case SUM_PRECISE_STATE_MINUS_ZERO:
-            return -0.0;
          case SUM_PRECISE_STATE_INFINITY:
              return INFINITY;
          case SUM_PRECISE_STATE_MINUS_INFINITY:
@@ -45602,38 +45586,53 @@ static double sum_precise_get_result(SumPreciseState *s)
          }
      }
  
+    sum_precise_renorm(s);
+
      /* extract the sign and absolute value */
-    n = s->n_limbs;
-    is_neg = s->acc[n - 1] >> 63;
-    if (is_neg) {
-        /* acc = -acc */
-        carry = 1;
-        for(i = 0; i < n; i++) {
-            ADDC64(s->acc[i], carry, ~s->acc[i], 0, carry);
-        }
-    }
-    /* normalize */
-    while (n > 0 && s->acc[n - 1] == 0)
-        n--;
  #if 0
      {
-        printf("res=");
-        for(i = n - 1; i >= 0; i--)
-            printf(" %016lx", s->acc[i]);
+        int i;
+        printf("len=%d:", s->n_limbs);
+        for(i = s->n_limbs - 1; i >= 0; i--)
+            printf(" %014lx", s->acc[i]);
          printf("\n");
      }
  #endif
+    n = s->n_limbs;
+    /* minus zero result */
+    if (n == 0)
+        return -0.0;
+    
+    /* normalize */
+    while (n > 0 && s->acc[n - 1] == 0)
+        n--;
      /* zero result. The spec tells it is always positive in the finite case */
      if (n == 0)
          return 0.0;
+    is_neg = (s->acc[n - 1] < 0);
+    if (is_neg) {
+        uint64_t v, carry;
+        int i;
+        /* negate */
+        /* XXX: do it only when needed */
+        carry = 1;
+        for(i = 0; i < n - 1; i++) {
+            v = (((uint64_t)1 << SP_LIMB_BITS) - 1) - s->acc[i] + carry;
+            carry = v >> SP_LIMB_BITS;
+            s->acc[i] = v & (((uint64_t)1 << SP_LIMB_BITS) - 1);
+        }
+        s->acc[n - 1] = -s->acc[n - 1] + carry - 1;
+        while (n > 1 && s->acc[n - 1] == 0)
+            n--;
+    }
      /* subnormal case */
      if (n == 1 && s->acc[0] < ((uint64_t)1 << 52))
          return uint64_as_float64(((uint64_t)is_neg << 63) | s->acc[0]); 
      /* normal case */
-    e = n * 64;
+    e = n * SP_LIMB_BITS;
      p = n - 1;
      m = s->acc[p];
-    shift = clz64(m);
+    shift = clz64(m) - (64 - SP_LIMB_BITS);
      e = e - shift - 52;
      if (shift != 0) {
          m <<= shift;
@@ -45641,12 +45640,12 @@ static double sum_precise_get_result(SumPreciseState *s)
              int shift1;
              uint64_t nz;
              p--;
-            shift1 = 64 - shift;
+            shift1 = SP_LIMB_BITS - shift;
              nz = s->acc[p] & (((uint64_t)1 << shift1) - 1);
              m = m | (s->acc[p] >> shift1) | (nz != 0);
          }
      }
-    if ((m & ((1 << 10) - 1)) == 0) {
+    if ((m & ((1 << SP_RND_BITS) - 1)) == (1 << (SP_RND_BITS - 1))) {
          /* see if the LSB part is non zero for the final rounding  */
          while (p > 0) {
              p--;
@@ -45657,10 +45656,10 @@ static double sum_precise_get_result(SumPreciseState *s)
          }
      }
      /* rounding to nearest with ties to even */
-    addend = (1 << 10) - 1 + ((m >> 11) & 1);
-    m = (m + addend) >> 11;
+    addend = (1 << (SP_RND_BITS - 1)) - 1 + ((m >> SP_RND_BITS) & 1);
+    m = (m + addend) >> SP_RND_BITS;
      /* handle overflow in the rounding */
-    if (m == 0)
+    if (m == ((uint64_t)1 << 53))
          e++;
      if (unlikely(e >= 2047)) {
          /* infinity */
author	Fabrice Bellard <fabrice@bellard.org>
	Mon, 29 Sep 2025 14:12:23 +0000 (16:12 +0200)
committer	Fabrice Bellard <fabrice@bellard.org>
	Mon, 29 Sep 2025 14:12:23 +0000 (16:12 +0200)