diff --git a/arb.h b/arb.h index 6b1008510..1fa01aaa6 100644 --- a/arb.h +++ b/arb.h @@ -413,6 +413,8 @@ void arb_mul_si(arb_t z, const arb_t x, slong y, slong prec); void arb_mul_ui(arb_t z, const arb_t x, ulong y, slong prec); void arb_mul_fmpz(arb_t z, const arb_t x, const fmpz_t y, slong prec); +void arb_sqr(arb_t res, const arb_t x, slong prec); + void arb_addmul(arb_t z, const arb_t x, const arb_t y, slong prec); void arb_addmul_arf(arb_t z, const arb_t x, const arf_t y, slong prec); void arb_addmul_si(arb_t z, const arb_t x, slong y, slong prec); @@ -484,9 +486,27 @@ void arb_rsqrt(arb_t z, const arb_t x, slong prec); void arb_rsqrt_ui(arb_t z, ulong x, slong prec); void arb_sqrt1pm1(arb_t r, const arb_t z, slong prec); -void arb_pow_fmpz_binexp(arb_t y, const arb_t b, const fmpz_t e, slong prec); -void arb_pow_fmpz(arb_t y, const arb_t b, const fmpz_t e, slong prec); -void arb_pow_ui(arb_t y, const arb_t b, ulong e, slong prec); +void _arb_pow_mpz_binexp(arb_t y, const arb_t b, mpz_srcptr e, slong prec); +void arb_pow_ui_binexp(arb_t y, const arb_t b, ulong e, slong prec); +void arb_pow_si(arb_t y, const arb_t b, slong e, slong prec); +ARB_INLINE void +arb_pow_fmpz_binexp(arb_t y, const arb_t b, const fmpz_t e, slong prec) +{ + if (!COEFF_IS_MPZ(*e)) + arb_pow_si(y, b, *e, prec); + else + _arb_pow_mpz_binexp(y, b, COEFF_TO_PTR(*e), prec); +} +ARB_INLINE void +arb_pow_fmpz(arb_t y, const arb_t b, const fmpz_t e, slong prec) +{ + arb_pow_fmpz_binexp(y, b, e, prec); +} +ARB_INLINE void +arb_pow_ui(arb_t y, const arb_t b, ulong e, slong prec) +{ + arb_pow_ui_binexp(y, b, e, prec); +} void arb_ui_pow_ui(arb_t y, ulong b, ulong e, slong prec); void arb_si_pow_ui(arb_t y, slong b, ulong e, slong prec); void arb_pow_fmpq(arb_t y, const arb_t x, const fmpq_t a, slong prec); @@ -620,12 +640,6 @@ void arb_primorial_ui(arb_t res, ulong n, slong prec); void arb_lambertw(arb_t res, const arb_t x, int flags, slong prec); -ARB_INLINE void -arb_sqr(arb_t res, const arb_t val, slong prec) -{ - arb_mul(res, val, val, prec); -} - #define ARB_DEF_CACHED_CONSTANT(name, comp_func) \ TLS_PREFIX slong name ## _cached_prec = 0; \ TLS_PREFIX arb_t name ## _cached_value; \ diff --git a/arb/pow_binexp.c b/arb/pow_binexp.c new file mode 100644 index 000000000..9e21ca5d9 --- /dev/null +++ b/arb/pow_binexp.c @@ -0,0 +1,100 @@ +/* + Copyright (C) 2012, 2013 Fredrik Johansson + Copyright (C) 2021 Albin Ahlbäck + + This file is part of Arb. + + Arb is free software: you can redistribute it and/or modify it under + the terms of the GNU Lesser General Public License (LGPL) as published + by the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. See . +*/ + +#include "arb.h" + +void +_arb_pow_mpz_binexp(arb_t y, const arb_t b, mpz_srcptr e, slong prec) +{ + slong i, wp, bits; + + if (e->_mp_size < 0) + { + __mpz_struct f = { 1, 0, NULL }; + f._mp_size = -(e->_mp_size); + f._mp_d = e->_mp_d; + + if (arb_is_exact(b)) + { + _arb_pow_mpz_binexp(y, b, &f, prec + 2); + arb_inv(y, y, prec); + } + else + { + arb_inv(y, b, prec + mpz_sizeinbase(e, 2) + 2); + _arb_pow_mpz_binexp(y, y, &f, prec); + } + + return; + } + + if (y == b) + { + arb_t t; + arb_init(t); + arb_set(t, b); + _arb_pow_mpz_binexp(y, t, e, prec); + arb_clear(t); + return; + } + + arb_set(y, b); + + bits = mpz_sizeinbase(e, 2); + wp = ARF_PREC_ADD(prec, bits); + + for (i = bits - 2; i >= 0; i--) + { + arb_sqr(y, y, wp); + if (mpz_tstbit(e, i)) + arb_mul(y, y, b, wp); + } +} + +void +arb_pow_ui_binexp(arb_t y, const arb_t b, ulong e, slong prec) +{ + slong i, wp, bits; + + if (e <= 2) + { + if (e == 0) + arb_one(y); + else if (e == 1) + arb_set_round(y, b, prec); + else + arb_sqr(y, b, prec); + return; + } + + if (y == b) + { + arb_t t; + arb_init(t); + arb_set(t, b); + arb_pow_ui_binexp(y, t, e, prec); + arb_clear(t); + return; + } + + arb_set(y, b); + + bits = FLINT_BIT_COUNT(e); + wp = ARF_PREC_ADD(prec, bits); + + for (i = bits - 2; i >= 0; i--) + { + arb_sqr(y, y, wp); + if (((WORD(1) << i) & e) != 0) + arb_mul(y, y, b, wp); + } +} diff --git a/arb/pow_fmpz_binexp.c b/arb/pow_fmpz_binexp.c deleted file mode 100644 index 4e4f66afd..000000000 --- a/arb/pow_fmpz_binexp.c +++ /dev/null @@ -1,80 +0,0 @@ -/* - Copyright (C) 2012, 2013 Fredrik Johansson - - This file is part of Arb. - - Arb is free software: you can redistribute it and/or modify it under - the terms of the GNU Lesser General Public License (LGPL) as published - by the Free Software Foundation; either version 2.1 of the License, or - (at your option) any later version. See . -*/ - -#include "arb.h" - -void -arb_pow_fmpz_binexp(arb_t y, const arb_t b, const fmpz_t e, slong prec) -{ - slong i, wp, bits; - - if (-WORD(2) <= *e && *e <= WORD(2)) - { - if (*e == WORD(0)) - arb_one(y); - else if (*e == WORD(1)) - arb_set_round(y, b, prec); - else if (*e == -WORD(1)) - arb_inv(y, b, prec); - else if (*e == WORD(2)) - arb_mul(y, b, b, prec); - else - { - arb_inv(y, b, prec); - arb_mul(y, y, y, prec); - } - return; - } - - if (fmpz_sgn(e) < 0) - { - fmpz_t f; - fmpz_init(f); - fmpz_neg(f, e); - - if (arb_is_exact(b)) - { - arb_pow_fmpz_binexp(y, b, f, prec + 2); - arb_inv(y, y, prec); - } - else - { - arb_inv(y, b, prec + fmpz_bits(e) + 2); - arb_pow_fmpz_binexp(y, y, f, prec); - } - - fmpz_clear(f); - return; - } - - if (y == b) - { - arb_t t; - arb_init(t); - arb_set(t, b); - arb_pow_fmpz_binexp(y, t, e, prec); - arb_clear(t); - return; - } - - arb_set(y, b); - - bits = fmpz_bits(e); - wp = ARF_PREC_ADD(prec, bits); - - for (i = bits - 2; i >= 0; i--) - { - arb_mul(y, y, y, wp); - if (fmpz_tstbit(e, i)) - arb_mul(y, y, b, wp); - } -} - diff --git a/arb/pow_si.c b/arb/pow_si.c new file mode 100644 index 000000000..e626a1bc8 --- /dev/null +++ b/arb/pow_si.c @@ -0,0 +1,37 @@ +/* + Copyright (C) 2021 Albin Ahlbäck + + This file is part of Arb. + + Arb is free software: you can redistribute it and/or modify it under + the terms of the GNU Lesser General Public License (LGPL) as published + by the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. See . +*/ + +#include "arb.h" + +void +arb_pow_si(arb_t y, const arb_t b, slong e, slong prec) +{ + if (e >= 0) + arb_pow_ui_binexp(y, b, e, prec); + else if (e == -1) + arb_inv(y, b, prec); + else if (e == -2) + { + arb_inv(y, b, prec); + arb_sqr(y, y, prec); + } + else if (arb_is_exact(b)) + { + arb_pow_ui_binexp(y, b, -e, prec + 2); + arb_inv(y, y, prec); + } + else + { + e = -e; + arb_inv(y, b, prec + FLINT_BIT_COUNT(e) + 2); + arb_pow_ui_binexp(y, y, e, prec); + } +} diff --git a/arb/sqr.c b/arb/sqr.c new file mode 100644 index 000000000..c6900379b --- /dev/null +++ b/arb/sqr.c @@ -0,0 +1,67 @@ +/* + Copyright (C) 2021 Albin Ahlbäck + + This file is part of Arb. + + Arb is free software: you can redistribute it and/or modify it under + the terms of the GNU Lesser General Public License (LGPL) as published + by the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. See . +*/ + +#include "arb.h" + +void +arb_sqr(arb_t res, const arb_t x, slong prec) +{ + mag_t resr, xm; + int inexact; + + if (arb_is_exact(x)) + { + inexact = arf_sqr_rnd_down(arb_midref(res), arb_midref(x), prec); + + if (inexact) + arf_mag_set_ulp(arb_radref(res), arb_midref(res), prec); + else + mag_zero(arb_radref(res)); + } + else if (ARB_IS_LAGOM(x) && ARB_IS_LAGOM(res)) + { + mag_fast_init_set_arf(xm, arb_midref(x)); + + mag_init(resr); + mag_fast_mul(resr, xm, arb_radref(x)); + if (resr->exp < COEFF_MAX && resr->exp >= COEFF_MIN) + (resr->exp)++; + else + fmpz_add_ui(&(resr->exp), &(resr->exp), 1); + mag_fast_addmul(resr, arb_radref(x), arb_radref(x)); + + inexact = arf_sqr_rnd_down(arb_midref(res), arb_midref(x), prec); + + if (inexact) + arf_mag_fast_add_ulp(resr, resr, arb_midref(res), prec); + + *arb_radref(res) = *resr; + } + else + { + mag_init_set_arf(xm, arb_midref(x)); + + mag_init(resr); + mag_mul(resr, xm, arb_radref(x)); + fmpz_add_ui(&(resr->exp), &(resr->exp), 1); + mag_addmul(resr, arb_radref(x), arb_radref(x)); + + inexact = arf_sqr_rnd_down(arb_midref(res), arb_midref(x), prec); + + if (inexact) + arf_mag_add_ulp(arb_radref(res), resr, arb_midref(res), prec); + else + mag_swap(arb_radref(res), resr); + + mag_clear(xm); + mag_clear(resr); + } +} diff --git a/arb/test/t-sqr.c b/arb/test/t-sqr.c new file mode 100644 index 000000000..2c4fb8292 --- /dev/null +++ b/arb/test/t-sqr.c @@ -0,0 +1,111 @@ +/* + Copyright (C) 2021 Albin Ahlbäck + + This file is part of Arb. + + Arb is free software: you can redistribute it and/or modify it under + the terms of the GNU Lesser General Public License (LGPL) as published + by the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. See . +*/ + +#include "arb.h" + +int +mag_close(const mag_t am, const mag_t bm) +{ + arf_t t, a, b; + int res1, res2; + + arf_init(t); + arf_init(a); + arf_init(b); + + arf_set_mag(a, am); + arf_set_mag(b, bm); + + arf_mul_ui(t, b, 257, MAG_BITS, ARF_RND_UP); + arf_mul_2exp_si(t, t, -8); + res1 = arf_cmp(a, t) <= 0; + + arf_mul_ui(t, a, 257, MAG_BITS, ARF_RND_UP); + arf_mul_2exp_si(t, t, -8); + res2 = arf_cmp(b, t) <= 0; + + arf_clear(t); + arf_clear(a); + arf_clear(b); + + return res1 && res2; +} + +int main() +{ + slong iter, iter2; + flint_rand_t state; + + flint_printf("sqr...."); + fflush(stdout); + + flint_randinit(state); + + for (iter = 0; iter < 10000 * arb_test_multiplier(); iter++) + { + arb_t x, z, v; + slong prec; + + arb_init(x); + arb_init(z); + arb_init(v); + + for (iter2 = 0; iter2 < 100; iter2++) + { + arb_randtest_special(x, state, n_randint(state,2) ? 2000 : 200, 200); + + prec = 2 + n_randint(state, 2000); + + switch (n_randint(state, 2)) + { + case 0: + arb_mul(z, x, x, prec); + arb_sqr(v, x, prec); + + if (!arf_equal(arb_midref(z), arb_midref(v)) + || !mag_close(arb_radref(z), arb_radref(v))) + { + flint_printf("FAIL!\n"); + flint_printf("x = "); arb_print(x); flint_printf("\n\n"); + flint_printf("z = "); arb_print(z); flint_printf("\n\n"); + flint_printf("v = "); arb_print(v); flint_printf("\n\n"); + flint_abort(); + } + break; + + default: + arb_set(v, x); + arb_mul(z, x, x, prec); + arb_sqr(v, v, prec); + + if (!arf_equal(arb_midref(v), arb_midref(z)) + || !mag_close(arb_radref(v), arb_radref(z))) + { + flint_printf("FAIL (aliasing)!\n"); + flint_printf("x = "); arb_print(x); flint_printf("\n\n"); + flint_printf("v = "); arb_print(v); flint_printf("\n\n"); + flint_printf("z = "); arb_print(z); flint_printf("\n\n"); + flint_abort(); + } + break; + } + } + + arb_clear(x); + arb_clear(z); + arb_clear(v); + } + + flint_randclear(state); + flint_cleanup(); + flint_printf("PASS\n"); + return EXIT_SUCCESS; +} diff --git a/arf.h b/arf.h index 495293ce8..438c3b6c3 100644 --- a/arf.h +++ b/arf.h @@ -823,6 +823,18 @@ void arf_urandom(arf_t x, flint_rand_t state, slong bits, arf_rnd_t rnd); r3 += r2 < t1; \ } while (0) +#define nn_sqr_2(r3, r2, r1, r0, a1, a0) \ + do { \ + mp_limb_t t1, t2, t3; \ + umul_ppmm(r1, r0, a0, a0); \ + umul_ppmm(t2, t1, a1, a0); \ + add_ssaaaa(r2, r1, t2, t1, 0, r1); \ + umul_ppmm(r3, t3, a1, a1); \ + add_ssaaaa(r3, t2, r3, t2, 0, t3); \ + add_ssaaaa(r2, r1, r2, r1, t2, t1); \ + r3 += r2 < t2; \ + } while (0) + #define ARF_MPN_MUL(_z, _x, _xn, _y, _yn) \ if ((_xn) == (_yn)) \ { \ @@ -914,6 +926,12 @@ int arf_mul_rnd_down(arf_ptr z, arf_srcptr x, arf_srcptr y, slong prec); ? arf_mul_rnd_down(z, x, y, prec) \ : arf_mul_rnd_any(z, x, y, prec, rnd)) +int arf_sqr_via_mpfr(arf_ptr res, arf_srcptr x, slong prec, arf_rnd_t rnd); + +int arf_sqr_rnd_down(arf_ptr res, arf_srcptr x, slong prec); + +void arf_sqr_special(arf_t res, const arf_t x); + ARF_INLINE int arf_neg_mul(arf_t z, const arf_t x, const arf_t y, slong prec, arf_rnd_t rnd) { diff --git a/arf/mul_rnd_down.c b/arf/mul_rnd_down.c index 731a1d545..2f9b4b2a0 100644 --- a/arf/mul_rnd_down.c +++ b/arf/mul_rnd_down.c @@ -83,7 +83,7 @@ arf_mul_rnd_down(arf_ptr z, arf_srcptr x, arf_srcptr y, slong prec) { ret = 0; } - else /* prec < FLINT_BITS < 2 * FLINT_BITS */ + else /* FLINT_BITS < prec < 2 * FLINT_BITS */ { ret = MASK_LIMB(lo, 2 * FLINT_BITS - prec) != lo; lo = MASK_LIMB(lo, 2 * FLINT_BITS - prec); @@ -183,7 +183,7 @@ arf_mul_rnd_down(arf_ptr z, arf_srcptr x, arf_srcptr y, slong prec) } else { - mp_size_t zn, alloc; + mp_size_t alloc; mp_srcptr xptr, yptr; mp_ptr tmp; ARF_MUL_TMP_DECL diff --git a/arf/sqr_rnd_down.c b/arf/sqr_rnd_down.c new file mode 100644 index 000000000..1128f34f7 --- /dev/null +++ b/arf/sqr_rnd_down.c @@ -0,0 +1,181 @@ +/* + Copyright (C) 2021 Albin Ahlbäck + + This file is part of Arb. + + Arb is free software: you can redistribute it and/or modify it under + the terms of the GNU Lesser General Public License (LGPL) as published + by the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. See . +*/ + +#include "arf.h" + +int +arf_sqr_rnd_down(arf_ptr res, arf_srcptr x, slong prec) +{ + mp_size_t xn, resn; + mp_limb_t hi, lo; + slong expfix; + int ret, fix; + mp_ptr resptr; + + xn = ARF_XSIZE(x); + xn >>= 1; + + /* Either operand is a special value. */ + if (xn == 0) + { + arf_sqr_special(res, x); + return 0; + } + + if (xn == 1) + { + lo = ARF_NOPTR_D(x)[0]; + + umul_ppmm(hi, lo, lo, lo); + + /* Shift so that the top bit is set (branch free version). */ + fix = !(hi >> (FLINT_BITS - 1)); + hi = (hi << fix) | ((lo >> (FLINT_BITS - 1)) & fix); + lo = (lo << fix); + + ARF_DEMOTE(res); + + if (lo == 0) + { + resn = 1; + + if (prec >= FLINT_BITS) + { + lo = hi; + ret = 0; + } + else + { + lo = MASK_LIMB(hi, FLINT_BITS - prec); + ret = (lo != hi); + } + } + else + { + resn = 2; + + if (prec <= FLINT_BITS) /* Must be inexact. */ + { + lo = MASK_LIMB(hi, FLINT_BITS - prec); + resn = ret = 1; + } + else if (prec >= 2 * FLINT_BITS) /* Must be exact. */ + { + ret = 0; + } + else /* FLINT_BITS < prec < 2 * FLINT_BITS */ + { + ret = MASK_LIMB(lo, 2 * FLINT_BITS - prec) != lo; + lo = MASK_LIMB(lo, 2 * FLINT_BITS - prec); + + if (lo == 0) + { + resn = 1; + lo = hi; + } + } + } + + _fmpz_2times_fast(ARF_EXPREF(res), ARF_EXPREF(x), -fix); + ARF_XSIZE(res) = ARF_MAKE_XSIZE(resn, 0); + + resptr = ARF_NOPTR_D(res); + resptr[0] = lo; + resptr[1] = hi; + + return ret; + } + else if (xn == 2) + { + mp_limb_t zz[4]; + mp_limb_t x1, x0; + + x0 = ARF_NOPTR_D(x)[0]; + x1 = ARF_NOPTR_D(x)[1]; + + nn_sqr_2(zz[3], zz[2], zz[1], zz[0], x1, x0); + + /* Likely case, must be inexact */ + if (prec <= 2 * FLINT_BITS) + { + ARF_DEMOTE(res); + + fix = !(zz[3] >> (FLINT_BITS - 1)); + zz[3] = (zz[3] << fix) | ((zz[2] >> (FLINT_BITS - 1)) & fix); + zz[2] = (zz[2] << fix) | ((zz[1] >> (FLINT_BITS - 1)) & fix); + + _fmpz_2times_fast(ARF_EXPREF(res), ARF_EXPREF(x), -fix); + + /* Rounding */ + if (prec != 2 * FLINT_BITS) + { + if (prec > FLINT_BITS) + { + zz[2] &= (LIMB_ONES << (2 * FLINT_BITS - prec)); + } + else if (prec == FLINT_BITS) + { + zz[2] = 0; + } + else + { + zz[3] &= (LIMB_ONES << (FLINT_BITS - prec)); + zz[2] = 0; + } + } + + if (zz[2] == 0) + { + ARF_XSIZE(res) = ARF_MAKE_XSIZE(1, 0); + ARF_NOPTR_D(res)[0] = zz[3]; + } + else + { + ARF_XSIZE(res) = ARF_MAKE_XSIZE(2, 0); + ARF_NOPTR_D(res)[0] = zz[2]; + ARF_NOPTR_D(res)[1] = zz[3]; + } + + return 1; + } + + resn = 2 * xn; + ret = _arf_set_round_mpn(res, &expfix, zz, resn, 0, prec, ARF_RND_DOWN); + _fmpz_2times_fast(ARF_EXPREF(res), ARF_EXPREF(x), expfix); + return ret; + } + else if (xn > MUL_MPFR_MIN_LIMBS && prec != ARF_PREC_EXACT + && xn > 0.675 * prec / FLINT_BITS + && xn < MUL_MPFR_MAX_LIMBS) /* FIXME: proper cutoffs */ + { + return arf_sqr_via_mpfr(res, x, prec, ARF_RND_DOWN); + } + else + { + mp_size_t alloc; + mp_srcptr xptr; + mp_ptr tmp; + ARF_MUL_TMP_DECL + + ARF_GET_MPN_READONLY(xptr, xn, x); + + alloc = resn = 2 * xn; + ARF_MUL_TMP_ALLOC(tmp, alloc) + + mpn_sqr(tmp, xptr, xn); + + ret = _arf_set_round_mpn(res, &expfix, tmp, resn, 0, prec, ARF_RND_DOWN); + _fmpz_2times_fast(ARF_EXPREF(res), ARF_EXPREF(x), expfix); + ARF_MUL_TMP_FREE(tmp, alloc) + + return ret; + } +} diff --git a/arb/pow_fmpz.c b/arf/sqr_special.c similarity index 51% rename from arb/pow_fmpz.c rename to arf/sqr_special.c index dc470b274..e6e1b154c 100644 --- a/arb/pow_fmpz.c +++ b/arf/sqr_special.c @@ -1,5 +1,5 @@ /* - Copyright (C) 2012, 2013 Fredrik Johansson + Copyright (C) 2021 Albin Ahlbäck This file is part of Arb. @@ -9,19 +9,15 @@ (at your option) any later version. See . */ -#include "arb.h" +#include "arf.h" void -arb_pow_fmpz(arb_t y, const arb_t b, const fmpz_t e, slong prec) +arf_sqr_special(arf_t res, const arf_t x) { - arb_pow_fmpz_binexp(y, b, e, prec); -} - -void -arb_pow_ui(arb_t y, const arb_t b, ulong e, slong prec) -{ - fmpz_t f; - fmpz_init_set_ui(f, e); - arb_pow_fmpz(y, b, f, prec); - fmpz_clear(f); + if (arf_is_zero(x)) + arf_zero(res); + else if (arf_is_nan(x)) + arf_nan(res); + else + arf_pos_inf(res); } diff --git a/arf/sqr_via_mpfr.c b/arf/sqr_via_mpfr.c new file mode 100644 index 000000000..4e3d6697a --- /dev/null +++ b/arf/sqr_via_mpfr.c @@ -0,0 +1,63 @@ +/* + Copyright (C) 2021 Albin Ahlbäck + + This file is part of Arb. + + Arb is free software: you can redistribute it and/or modify it under + the terms of the GNU Lesser General Public License (LGPL) as published + by the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. See . +*/ + +#include "arf.h" + +int +arf_sqr_via_mpfr(arf_ptr res, arf_srcptr x, slong prec, arf_rnd_t rnd) +{ + mp_size_t xn, resn, val; + mp_srcptr xptr; + mp_ptr tmp, resptr; + mpfr_t xf, resf; + int ret; + ARF_MUL_TMP_DECL + + if (arf_is_special(x)) + { + arf_sqr_special(res, x); + return 0; + } + + ARF_GET_MPN_READONLY(xptr, xn, x); + + prec = FLINT_MIN(xn * 2 * FLINT_BITS, prec); + resn = (prec + FLINT_BITS - 1) / FLINT_BITS; + + ARF_MUL_TMP_ALLOC(tmp, resn) + + resf->_mpfr_d = tmp; + resf->_mpfr_prec = prec; + resf->_mpfr_sign = 1; + resf->_mpfr_exp = 0; + + xf->_mpfr_d = (mp_ptr) xptr; + xf->_mpfr_prec = xn * FLINT_BITS; + xf->_mpfr_sign = 1; /* Sign does not matter since we are squaring */ + xf->_mpfr_exp = 0; + + ret = mpfr_sqr(resf, xf, arf_rnd_to_mpfr(rnd)); + + ret = (ret != 0); + + _fmpz_2times_fast(ARF_EXPREF(res), ARF_EXPREF(x), resf->_mpfr_exp); + + val = 0; + while (tmp[val] == 0) + val++; + + ARF_GET_MPN_WRITE(resptr, resn - val, res); + flint_mpn_copyi(resptr, tmp + val, resn - val); + + ARF_MUL_TMP_FREE(tmp, resn) + + return ret; +} diff --git a/arf/test/t-nn_sqr_2.c b/arf/test/t-nn_sqr_2.c new file mode 100644 index 000000000..e0654eac8 --- /dev/null +++ b/arf/test/t-nn_sqr_2.c @@ -0,0 +1,45 @@ +/* + Copyright (C) 2021 Albin Ahlbäck + + This file is part of Arb. + + Arb is free software: you can redistribute it and/or modify it under + the terms of the GNU Lesser General Public License (LGPL) as published + by the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. See . +*/ + +#include "arf.h" + +int main() +{ + slong ix, result; + flint_rand_t state; + + flint_printf("nn_sqr_2...."); + fflush(stdout); + + flint_randinit(state); + + for (ix = 0; ix < 10000 * arb_test_multiplier(); ix++) + { + ulong r0, r1, r2, r3, s0, s1, s2, s3, a0, a1; + + a0 = n_randtest(state); + a1 = n_randtest(state); + + nn_mul_2x2(r3, r2, r1, r0, a1, a0, a1, a0); + nn_sqr_2(s3, s2, s1, s0, a1, a0); + + result = (s3 == r3 && s2 == r2 && s1 == r1 && s0 == r0); + if (!result) + { + flint_printf("FAIL!\n"); + flint_abort(); + } + } + + flint_randclear(state); + flint_printf("PASS\n"); + return EXIT_SUCCESS; +} diff --git a/arf/test/t-sqr_rnd_down.c b/arf/test/t-sqr_rnd_down.c new file mode 100644 index 000000000..f62aca379 --- /dev/null +++ b/arf/test/t-sqr_rnd_down.c @@ -0,0 +1,65 @@ +/* + Copyright (C) 2021 Albin Ahlbäck + + This file is part of Arb. + + Arb is free software: you can redistribute it and/or modify it under + the terms of the GNU Lesser General Public License (LGPL) as published + by the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. See . +*/ + +#include "arf.h" + +int main() +{ + slong iter, iter2; + flint_rand_t state; + + flint_printf("sqr_rnd_down...."); + fflush(stdout); + + flint_randinit(state); + + for (iter = 0; iter < 10000 * arb_test_multiplier(); iter++) + { + arf_t x, z, v; + slong prec, r1, r2; + arf_rnd_t rnd; + + arf_init(x); + arf_init(z); + arf_init(v); + + for (iter2 = 0; iter2 < 100; iter2++) + { + arf_randtest_special(x, state, 2000, 100); + prec = 2 + n_randint(state, 2000); + + if (n_randint(state, 50) == 0) + prec = ARF_PREC_EXACT; + + r1 = arf_mul_rnd_down(z, x, x, prec); + r2 = arf_sqr_rnd_down(v, x, prec); + if (!arf_equal(z, v) || r1 != r2) + { + flint_printf("FAIL!\n"); + flint_printf("prec = %wd, rnd = %d\n\n", prec, rnd); + flint_printf("x = "); arf_print(x); flint_printf("\n\n"); + flint_printf("z = "); arf_print(z); flint_printf("\n\n"); + flint_printf("v = "); arf_print(v); flint_printf("\n\n"); + flint_printf("r1 = %wd, r2 = %wd\n", r1, r2); + flint_abort(); + } + } + + arf_clear(x); + arf_clear(z); + arf_clear(v); + } + + flint_randclear(state); + flint_cleanup(); + flint_printf("PASS\n"); + return EXIT_SUCCESS; +} diff --git a/arf/test/t-sqr_via_mpfr.c b/arf/test/t-sqr_via_mpfr.c new file mode 100644 index 000000000..94b924b42 --- /dev/null +++ b/arf/test/t-sqr_via_mpfr.c @@ -0,0 +1,94 @@ +/* + Copyright (C) 2021 Albin Ahlbäck + + This file is part of Arb. + + Arb is free software: you can redistribute it and/or modify it under + the terms of the GNU Lesser General Public License (LGPL) as published + by the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. See . +*/ + +#include "arf.h" + +int main() +{ + slong iter, iter2; + flint_rand_t state; + + flint_printf("sqr_via_mpfr...."); + fflush(stdout); + + flint_randinit(state); + + for (iter = 0; iter < 10000 * arb_test_multiplier(); iter++) + { + arf_t x, z, v; + slong prec, r1, r2; + arf_rnd_t rnd; + + arf_init(x); + arf_init(z); + arf_init(v); + + for (iter2 = 0; iter2 < 100; iter2++) + { + arf_randtest_special(x, state, 2000, 100); + prec = 2 + n_randint(state, 2000); + + if (n_randint(state, 50) == 0) + prec = ARF_PREC_EXACT; + + switch (n_randint(state, 5)) + { + case 0: rnd = ARF_RND_DOWN; break; + case 1: rnd = ARF_RND_UP; break; + case 2: rnd = ARF_RND_FLOOR; break; + case 3: rnd = ARF_RND_CEIL; break; + default: rnd = ARF_RND_NEAR; break; + } + + switch (n_randint(state, 2)) + { + case 0: + r1 = arf_mul_via_mpfr(z, x, x, prec, rnd); + r2 = arf_sqr_via_mpfr(v, x, prec, rnd); + if (!arf_equal(z, v) || r1 != r2) + { + flint_printf("FAIL!\n"); + flint_printf("prec = %wd, rnd = %d\n\n", prec, rnd); + flint_printf("x = "); arf_print(x); flint_printf("\n\n"); + flint_printf("z = "); arf_print(z); flint_printf("\n\n"); + flint_printf("v = "); arf_print(v); flint_printf("\n\n"); + flint_printf("r1 = %wd, r2 = %wd\n", r1, r2); + flint_abort(); + } + break; + + default: + r1 = arf_mul_via_mpfr(v, x, x, prec, rnd); + r2 = arf_sqr_via_mpfr(x, x, prec, rnd); + if (!arf_equal(v, x) || r1 != r2) + { + flint_printf("FAIL (aliasing)!\n"); + flint_printf("prec = %wd, rnd = %d\n\n", prec, rnd); + flint_printf("x = "); arf_print(x); flint_printf("\n\n"); + flint_printf("z = "); arf_print(z); flint_printf("\n\n"); + flint_printf("v = "); arf_print(v); flint_printf("\n\n"); + flint_printf("r1 = %wd, r2 = %wd\n", r1, r2); + flint_abort(); + } + break; + } + } + + arf_clear(x); + arf_clear(z); + arf_clear(v); + } + + flint_randclear(state); + flint_cleanup(); + flint_printf("PASS\n"); + return EXIT_SUCCESS; +} diff --git a/doc/source/arb.rst b/doc/source/arb.rst index 70837041e..30ca849f7 100644 --- a/doc/source/arb.rst +++ b/doc/source/arb.rst @@ -989,28 +989,47 @@ Powers and roots Alias for :func:`arb_root_ui`, provided for backwards compatibility. -.. function:: void arb_sqr(arb_t y, const arb_t x, slong prec) +.. function:: void arb_sqr(arb_t res, const arb_t x, slong prec) - Sets *y* to be the square of *x*. + Sets *res* to the square of `x`. + +.. function:: void _arb_pow_mpz_binexp(arb_t y, const arb_t b, mpz_srcptr e, slong prec) + +.. function:: void arb_pow_ui_binexp(arb_t y, const arb_t b, ulong e, slong prec) + + Sets `y = b^e` via binary exponentiation with an initial division if + `e < 0`. Provided that `b` and `e` are small enough and the exponent is + positive, the exact power can be computed by setting the precision to + *ARF_PREC_EXACT*. + + Note that these functions can get slow if the exponent is + extremely large (in such cases :func:`arb_pow` may be superior). + +.. function:: void arb_pow_si(arb_t y, const arb_t b, slong e, slong prec); + + Sets `y = b^e` by wrapping *arb_pow_ui*, doing an initial division if + `e < 0`. .. function:: void arb_pow_fmpz_binexp(arb_t y, const arb_t b, const fmpz_t e, slong prec) + Sets `y = b^e` by wrapping *arb_pow_si* and *_arb_pow_mpz_binexp*. + .. function:: void arb_pow_fmpz(arb_t y, const arb_t b, const fmpz_t e, slong prec) + Wrapper for *arb_pow_fmpz_binexp*. + .. function:: void arb_pow_ui(arb_t y, const arb_t b, ulong e, slong prec) + Wrapper for *arb_pow_ui_binexp*. + .. function:: void arb_ui_pow_ui(arb_t y, ulong b, ulong e, slong prec) .. function:: void arb_si_pow_ui(arb_t y, slong b, ulong e, slong prec) - Sets `y = b^e` using binary exponentiation (with an initial division - if `e < 0`). Provided that *b* and *e* + Sets `y = b^e` using binary exponentiation. Provided that *b* and *e* are small enough and the exponent is positive, the exact power can be computed by setting the precision to *ARF_PREC_EXACT*. - Note that these functions can get slow if the exponent is - extremely large (in such cases :func:`arb_pow` may be superior). - .. function:: void arb_pow_fmpq(arb_t y, const arb_t x, const fmpq_t a, slong prec) Sets `y = b^e`, computed as `y = (b^{1/q})^p` if the denominator of diff --git a/doc/source/arf.rst b/doc/source/arf.rst index 0d8ba4fd3..3d4e4b9ae 100644 --- a/doc/source/arf.rst +++ b/doc/source/arf.rst @@ -111,6 +111,18 @@ Types, macros and constants test code. If in doubt, simply try with some convenient high precision instead of using this special value, and check that the result is exact. +.. macro:: nn_mul_2x1(r2, r1, r0, a1, a0, b0) + + Sets `(r_2, r_1, r_0)` to `(a_1, a_0)` multiplied by `b_0`. + +.. macro:: nn_mul_2x2(r3, r2, r1, r0, a1, a0, b1, b0) + + Sets `(r_2, r_1, r_0)` to `(a_1, a_0)` multiplied by `(b_1, b_0)`. + +.. macro:: nn_sqr_2(r3, r2, r1, r0, a1, a0) + + Sets `(r_2, r_1, r_0)` to the square of `(a_1, a_0)`. + Memory management ------------------------------------------------------------------------------- @@ -553,7 +565,8 @@ Addition and multiplication .. function:: int arf_neg_round(arf_t res, const arf_t x, slong prec, arf_rnd_t rnd) - Sets *res* to `-x`. + Sets *res* to `-x`. Returns 0 if this operation was made exactly and 1 if + truncation occurred. .. function:: int arf_add(arf_t res, const arf_t x, const arf_t y, slong prec, arf_rnd_t rnd) @@ -563,11 +576,13 @@ Addition and multiplication .. function:: int arf_add_fmpz(arf_t res, const arf_t x, const fmpz_t y, slong prec, arf_rnd_t rnd) - Sets *res* to `x + y`. + Sets *res* to `x + y`. Returns 0 if this operation was made exactly and 1 + if truncation occurred. .. function:: int arf_add_fmpz_2exp(arf_t res, const arf_t x, const fmpz_t y, const fmpz_t e, slong prec, arf_rnd_t rnd) - Sets *res* to `x + y 2^e`. + Sets *res* to `x + y 2^e`. Returns 0 if this operation was made exactly and + 1 if truncation occurred. .. function:: int arf_sub(arf_t res, const arf_t x, const arf_t y, slong prec, arf_rnd_t rnd) @@ -577,7 +592,8 @@ Addition and multiplication .. function:: int arf_sub_fmpz(arf_t res, const arf_t x, const fmpz_t y, slong prec, arf_rnd_t rnd) - Sets *res* to `x - y`. + Sets *res* to `x - y`. Returns 0 if this operation was made exactly and 1 + if truncation occurred. .. function:: void arf_mul_2exp_si(arf_t res, const arf_t x, slong e) @@ -595,7 +611,31 @@ Addition and multiplication .. function:: int arf_mul_fmpz(arf_t res, const arf_t x, const fmpz_t y, slong prec, arf_rnd_t rnd) - Sets *res* to `x \cdot y`. + Sets *res* to `x \cdot y`. Returns 0 if this operation was made exactly and + 1 if truncation occurred. + +.. function:: void arf_mul_special(arf_t z, const arf_t x, const arf_t y) + + Sets *res* to the special value of `x \cdot y`. + + Note that it is assumed that at least one of `x` and `y` has special + values. + +.. function:: int arf_sqr_via_mpfr(arf_ptr res, arf_srcptr x, slong prec, arf_rnd_t rnd) + + Sets *res* to `x^2` by calling MPFR. Returns 0 if this operation was made + exactly and 1 if truncation occurred. + +.. function:: int arf_sqr_rnd_down(arf_ptr res, arf_srcptr x, slong prec) + + Sets *res* to `x^2`. If the opertation was made exactly, it returns 1. + Otherwise it rounds downwards and returns 1. + +.. function:: void arf_sqr_special(arf_t res, const arf_t x) + + Sets *res* to the special value of `x^2`. + + Note that it is assumed that `x` has a special value. .. function:: int arf_addmul(arf_t z, const arf_t x, const arf_t y, slong prec, arf_rnd_t rnd) @@ -607,7 +647,8 @@ Addition and multiplication .. function:: int arf_addmul_fmpz(arf_t z, const arf_t x, const fmpz_t y, slong prec, arf_rnd_t rnd) - Performs a fused multiply-add `z = z + x \cdot y`, updating *z* in-place. + Adds `x \cdot y` to `z`, updating `z` in-place. Returns 0 if this operation + was made exactly and 1 if truncation occurred. .. function:: int arf_submul(arf_t z, const arf_t x, const arf_t y, slong prec, arf_rnd_t rnd) @@ -619,16 +660,21 @@ Addition and multiplication .. function:: int arf_submul_fmpz(arf_t z, const arf_t x, const fmpz_t y, slong prec, arf_rnd_t rnd) - Performs a fused multiply-subtract `z = z - x \cdot y`, updating *z* in-place. + Performs a fused multiply-subtract `z = z - x \cdot y`, updating *z* + in-place. Returns 0 if this operation was made exactly and 1 if truncation + occurred. .. function:: int arf_fma(arf_t res, const arf_t x, const arf_t y, const arf_t z, slong prec, arf_rnd_t rnd) Sets *res* to `x \cdot y + z`. This is equivalent to an *addmul* except - that *res* and *z* can be separate variables. + that *res* and *z* can be separate variables. Returns 0 if this operation + was made exactly and 1 if truncation occurred. .. function:: int arf_sosq(arf_t res, const arf_t x, const arf_t y, slong prec, arf_rnd_t rnd) - Sets *res* to `x^2 + y^2`, rounded to *prec* bits in the direction specified by *rnd*. + Sets *res* to `x^2 + y^2`, rounded to *prec* bits in the direction + specified by *rnd*. Returns 0 if this operation was made exactly and 1 if + truncation occurred. Summation ------------------------------------------------------------------------------- @@ -662,8 +708,9 @@ Division .. function:: int arf_fmpz_div_fmpz(arf_t res, const fmpz_t x, const fmpz_t y, slong prec, arf_rnd_t rnd) - Sets *res* to `x / y`, rounded to *prec* bits in the direction specified by *rnd*, - returning nonzero iff the operation is inexact. The result is NaN if *y* is zero. + Sets *res* to `x / y`, rounded to *prec* bits in the direction specified by + *rnd*, returning nonzero iff the operation is inexact. The result is NaN if + *y* is zero. Square roots ------------------------------------------------------------------------------- diff --git a/mag.h b/mag.h index 00eb44fa9..917798677 100644 --- a/mag.h +++ b/mag.h @@ -112,6 +112,23 @@ _fmpz_sub2_fast(fmpz_t z, const fmpz_t x, const fmpz_t y, slong c) } } +static __inline__ void +_fmpz_2times_fast(fmpz_t res, const fmpz_t x, slong c) +{ + slong cx = *x; + + if (!COEFF_IS_MPZ(*res) && (cx > COEFF_MIN / 2 && cx < COEFF_MAX / 2)) + { + *res = 2 * cx + c; + } + else + { + fmpz_mul_2exp(res, x, 1); + if (c != 0) + fmpz_add_si(res, res, c); + } +} + #define MAG_EXP_POS_INF (COEFF_MIN+1)