return JS_NewInt32(ctx, r);
}
-/* we add one extra limb to avoid having to test for overflows during the sum */
-#define SUM_PRECISE_ACC_LEN 34
-
typedef enum {
- SUM_PRECISE_STATE_MINUS_ZERO,
SUM_PRECISE_STATE_FINITE,
SUM_PRECISE_STATE_INFINITY,
SUM_PRECISE_STATE_MINUS_INFINITY, /* must be after SUM_PRECISE_STATE_INFINITY */
SUM_PRECISE_STATE_NAN, /* must be after SUM_PRECISE_STATE_MINUS_INFINITY */
} SumPreciseStateEnum;
+#define SP_LIMB_BITS 56
+#define SP_RND_BITS (SP_LIMB_BITS - 53)
+/* we add one extra limb to avoid having to test for overflows during the sum */
+#define SUM_PRECISE_ACC_LEN 39
+
+#define SUM_PRECISE_COUNTER_INIT 250
+
typedef struct {
- uint64_t acc[SUM_PRECISE_ACC_LEN];
- int n_limbs; /* acc is not necessarily normalized */
SumPreciseStateEnum state;
+ uint32_t counter;
+ int n_limbs; /* 'acc' contains n_limbs and is not necessarily
+ acc[n_limb - 1] may be 0. 0 indicates minus zero
+ result when state = SUM_PRECISE_STATE_FINITE */
+ int64_t acc[SUM_PRECISE_ACC_LEN];
} SumPreciseState;
static void sum_precise_init(SumPreciseState *s)
{
- s->state = SUM_PRECISE_STATE_MINUS_ZERO;
- s->acc[0] = 0;
- s->n_limbs = 1;
+ memset(s->acc, 0, sizeof(s->acc));
+ s->state = SUM_PRECISE_STATE_FINITE;
+ s->counter = SUM_PRECISE_COUNTER_INIT;
+ s->n_limbs = 0;
}
-#define ADDC64(res, carry_out, op1, op2, carry_in) \
-do { \
- uint64_t __v, __a, __k, __k1; \
- __v = (op1); \
- __a = __v + (op2); \
- __k1 = __a < __v; \
- __k = (carry_in); \
- __a = __a + __k; \
- carry_out = (__a < __k) | __k1; \
- res = __a; \
-} while (0)
+static void sum_precise_renorm(SumPreciseState *s)
+{
+ int64_t v, carry;
+ int i;
+
+ carry = 0;
+ for(i = 0; i < s->n_limbs; i++) {
+ v = s->acc[i] + carry;
+ s->acc[i] = v & (((uint64_t)1 << SP_LIMB_BITS) - 1);
+ carry = v >> SP_LIMB_BITS;
+ }
+ /* we add a failsafe but it should be never reached in a
+ reasonnable amount of time */
+ if (carry != 0 && s->n_limbs < SUM_PRECISE_ACC_LEN)
+ s->acc[s->n_limbs++] = carry;
+}
static void sum_precise_add(SumPreciseState *s, double d)
{
- uint64_t a, m, a0, carry, acc_sign, a_sign;
- int sgn, e, p, n, i;
- unsigned shift;
+ uint64_t a, m, a0, a1;
+ int sgn, e, p;
+ unsigned int shift;
a = float64_as_uint64(d);
sgn = a >> 63;
} else if (e == 0) {
if (likely(m == 0)) {
/* zero */
- if (s->state == SUM_PRECISE_STATE_MINUS_ZERO && !sgn)
- s->state = SUM_PRECISE_STATE_FINITE;
+ if (s->n_limbs == 0 && !sgn)
+ s->n_limbs = 1;
} else {
/* subnormal */
p = 0;
goto add;
}
} else {
+ /* Note: we sum even if state != SUM_PRECISE_STATE_FINITE to
+ avoid tests */
m |= (uint64_t)1 << 52;
shift = e - 1;
- p = shift / 64;
- /* 'p' is the position of a0 in acc */
- shift %= 64;
+ /* 'p' is the position of a0 in acc. The division is normally
+ implementation as a multiplication by the compiler. */
+ p = shift / SP_LIMB_BITS;
+ shift %= SP_LIMB_BITS;
add:
- if (s->state >= SUM_PRECISE_STATE_INFINITY)
- return;
- s->state = SUM_PRECISE_STATE_FINITE;
- n = s->n_limbs;
-
- acc_sign = (int64_t)s->acc[n - 1] >> 63;
-
- /* sign extend acc */
- for(i = n; i <= p; i++)
- s->acc[i] = acc_sign;
-
- carry = sgn;
- a_sign = -sgn;
- a0 = m << shift;
- ADDC64(s->acc[p], carry, s->acc[p], a0 ^ a_sign, carry);
- if (shift >= 12) {
- p++;
- if (p >= n)
- s->acc[p] = acc_sign;
- a0 = m >> (64 - shift);
- ADDC64(s->acc[p], carry, s->acc[p], a0 ^ a_sign, carry);
- }
- p++;
- if (p >= n) {
- n = p;
+ a0 = (m << shift) & (((uint64_t)1 << SP_LIMB_BITS) - 1);
+ a1 = m >> (SP_LIMB_BITS - shift);
+ if (!sgn) {
+ s->acc[p] += a0;
+ s->acc[p + 1] += a1;
} else {
- /* carry */
- for(i = p; i < n; i++) {
- /* if 'a' positive: stop condition: carry = 0.
- if 'a' negative: stop condition: carry = 1. */
- if (carry == sgn)
- goto done;
- ADDC64(s->acc[i], carry, s->acc[i], a_sign, carry);
- }
+ s->acc[p] -= a0;
+ s->acc[p + 1] -= a1;
}
-
- /* extend the accumulator if needed */
- a0 = carry + acc_sign + a_sign;
- /* -1 <= a0 <= 1 (if both acc and a are negative, carry is set) */
- if (a0 != ((int64_t)s->acc[n - 1] >> 63)) {
- s->acc[n++] = a0;
+ s->n_limbs = max_int(s->n_limbs, p + 2);
+
+ if (unlikely(--s->counter == 0)) {
+ s->counter = SUM_PRECISE_COUNTER_INIT;
+ sum_precise_renorm(s);
}
- done:
- s->n_limbs = n;
}
}
static double sum_precise_get_result(SumPreciseState *s)
{
- int n, shift, e, p, is_neg, i;
- uint64_t m, addend, carry;
+ int n, shift, e, p, is_neg;
+ uint64_t m, addend;
if (s->state != SUM_PRECISE_STATE_FINITE) {
switch(s->state) {
default:
- case SUM_PRECISE_STATE_MINUS_ZERO:
- return -0.0;
case SUM_PRECISE_STATE_INFINITY:
return INFINITY;
case SUM_PRECISE_STATE_MINUS_INFINITY:
}
}
+ sum_precise_renorm(s);
+
/* extract the sign and absolute value */
- n = s->n_limbs;
- is_neg = s->acc[n - 1] >> 63;
- if (is_neg) {
- /* acc = -acc */
- carry = 1;
- for(i = 0; i < n; i++) {
- ADDC64(s->acc[i], carry, ~s->acc[i], 0, carry);
- }
- }
- /* normalize */
- while (n > 0 && s->acc[n - 1] == 0)
- n--;
#if 0
{
- printf("res=");
- for(i = n - 1; i >= 0; i--)
- printf(" %016lx", s->acc[i]);
+ int i;
+ printf("len=%d:", s->n_limbs);
+ for(i = s->n_limbs - 1; i >= 0; i--)
+ printf(" %014lx", s->acc[i]);
printf("\n");
}
#endif
+ n = s->n_limbs;
+ /* minus zero result */
+ if (n == 0)
+ return -0.0;
+
+ /* normalize */
+ while (n > 0 && s->acc[n - 1] == 0)
+ n--;
/* zero result. The spec tells it is always positive in the finite case */
if (n == 0)
return 0.0;
+ is_neg = (s->acc[n - 1] < 0);
+ if (is_neg) {
+ uint64_t v, carry;
+ int i;
+ /* negate */
+ /* XXX: do it only when needed */
+ carry = 1;
+ for(i = 0; i < n - 1; i++) {
+ v = (((uint64_t)1 << SP_LIMB_BITS) - 1) - s->acc[i] + carry;
+ carry = v >> SP_LIMB_BITS;
+ s->acc[i] = v & (((uint64_t)1 << SP_LIMB_BITS) - 1);
+ }
+ s->acc[n - 1] = -s->acc[n - 1] + carry - 1;
+ while (n > 1 && s->acc[n - 1] == 0)
+ n--;
+ }
/* subnormal case */
if (n == 1 && s->acc[0] < ((uint64_t)1 << 52))
return uint64_as_float64(((uint64_t)is_neg << 63) | s->acc[0]);
/* normal case */
- e = n * 64;
+ e = n * SP_LIMB_BITS;
p = n - 1;
m = s->acc[p];
- shift = clz64(m);
+ shift = clz64(m) - (64 - SP_LIMB_BITS);
e = e - shift - 52;
if (shift != 0) {
m <<= shift;
int shift1;
uint64_t nz;
p--;
- shift1 = 64 - shift;
+ shift1 = SP_LIMB_BITS - shift;
nz = s->acc[p] & (((uint64_t)1 << shift1) - 1);
m = m | (s->acc[p] >> shift1) | (nz != 0);
}
}
- if ((m & ((1 << 10) - 1)) == 0) {
+ if ((m & ((1 << SP_RND_BITS) - 1)) == (1 << (SP_RND_BITS - 1))) {
/* see if the LSB part is non zero for the final rounding */
while (p > 0) {
p--;
}
}
/* rounding to nearest with ties to even */
- addend = (1 << 10) - 1 + ((m >> 11) & 1);
- m = (m + addend) >> 11;
+ addend = (1 << (SP_RND_BITS - 1)) - 1 + ((m >> SP_RND_BITS) & 1);
+ m = (m + addend) >> SP_RND_BITS;
/* handle overflow in the rounding */
- if (m == 0)
+ if (m == ((uint64_t)1 << 53))
e++;
if (unlikely(e >= 2047)) {
/* infinity */