diff --git a/arb.h b/arb.h
index 6b1008510..1fa01aaa6 100644
--- a/arb.h
+++ b/arb.h
@@ -413,6 +413,8 @@ void arb_mul_si(arb_t z, const arb_t x, slong y, slong prec);
void arb_mul_ui(arb_t z, const arb_t x, ulong y, slong prec);
void arb_mul_fmpz(arb_t z, const arb_t x, const fmpz_t y, slong prec);
+void arb_sqr(arb_t res, const arb_t x, slong prec);
+
void arb_addmul(arb_t z, const arb_t x, const arb_t y, slong prec);
void arb_addmul_arf(arb_t z, const arb_t x, const arf_t y, slong prec);
void arb_addmul_si(arb_t z, const arb_t x, slong y, slong prec);
@@ -484,9 +486,27 @@ void arb_rsqrt(arb_t z, const arb_t x, slong prec);
void arb_rsqrt_ui(arb_t z, ulong x, slong prec);
void arb_sqrt1pm1(arb_t r, const arb_t z, slong prec);
-void arb_pow_fmpz_binexp(arb_t y, const arb_t b, const fmpz_t e, slong prec);
-void arb_pow_fmpz(arb_t y, const arb_t b, const fmpz_t e, slong prec);
-void arb_pow_ui(arb_t y, const arb_t b, ulong e, slong prec);
+void _arb_pow_mpz_binexp(arb_t y, const arb_t b, mpz_srcptr e, slong prec);
+void arb_pow_ui_binexp(arb_t y, const arb_t b, ulong e, slong prec);
+void arb_pow_si(arb_t y, const arb_t b, slong e, slong prec);
+ARB_INLINE void
+arb_pow_fmpz_binexp(arb_t y, const arb_t b, const fmpz_t e, slong prec)
+{
+ if (!COEFF_IS_MPZ(*e))
+ arb_pow_si(y, b, *e, prec);
+ else
+ _arb_pow_mpz_binexp(y, b, COEFF_TO_PTR(*e), prec);
+}
+ARB_INLINE void
+arb_pow_fmpz(arb_t y, const arb_t b, const fmpz_t e, slong prec)
+{
+ arb_pow_fmpz_binexp(y, b, e, prec);
+}
+ARB_INLINE void
+arb_pow_ui(arb_t y, const arb_t b, ulong e, slong prec)
+{
+ arb_pow_ui_binexp(y, b, e, prec);
+}
void arb_ui_pow_ui(arb_t y, ulong b, ulong e, slong prec);
void arb_si_pow_ui(arb_t y, slong b, ulong e, slong prec);
void arb_pow_fmpq(arb_t y, const arb_t x, const fmpq_t a, slong prec);
@@ -620,12 +640,6 @@ void arb_primorial_ui(arb_t res, ulong n, slong prec);
void arb_lambertw(arb_t res, const arb_t x, int flags, slong prec);
-ARB_INLINE void
-arb_sqr(arb_t res, const arb_t val, slong prec)
-{
- arb_mul(res, val, val, prec);
-}
-
#define ARB_DEF_CACHED_CONSTANT(name, comp_func) \
TLS_PREFIX slong name ## _cached_prec = 0; \
TLS_PREFIX arb_t name ## _cached_value; \
diff --git a/arb/pow_binexp.c b/arb/pow_binexp.c
new file mode 100644
index 000000000..9e21ca5d9
--- /dev/null
+++ b/arb/pow_binexp.c
@@ -0,0 +1,100 @@
+/*
+ Copyright (C) 2012, 2013 Fredrik Johansson
+ Copyright (C) 2021 Albin Ahlbäck
+
+ This file is part of Arb.
+
+ Arb is free software: you can redistribute it and/or modify it under
+ the terms of the GNU Lesser General Public License (LGPL) as published
+ by the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version. See .
+*/
+
+#include "arb.h"
+
+void
+_arb_pow_mpz_binexp(arb_t y, const arb_t b, mpz_srcptr e, slong prec)
+{
+ slong i, wp, bits;
+
+ if (e->_mp_size < 0)
+ {
+ __mpz_struct f = { 1, 0, NULL };
+ f._mp_size = -(e->_mp_size);
+ f._mp_d = e->_mp_d;
+
+ if (arb_is_exact(b))
+ {
+ _arb_pow_mpz_binexp(y, b, &f, prec + 2);
+ arb_inv(y, y, prec);
+ }
+ else
+ {
+ arb_inv(y, b, prec + mpz_sizeinbase(e, 2) + 2);
+ _arb_pow_mpz_binexp(y, y, &f, prec);
+ }
+
+ return;
+ }
+
+ if (y == b)
+ {
+ arb_t t;
+ arb_init(t);
+ arb_set(t, b);
+ _arb_pow_mpz_binexp(y, t, e, prec);
+ arb_clear(t);
+ return;
+ }
+
+ arb_set(y, b);
+
+ bits = mpz_sizeinbase(e, 2);
+ wp = ARF_PREC_ADD(prec, bits);
+
+ for (i = bits - 2; i >= 0; i--)
+ {
+ arb_sqr(y, y, wp);
+ if (mpz_tstbit(e, i))
+ arb_mul(y, y, b, wp);
+ }
+}
+
+void
+arb_pow_ui_binexp(arb_t y, const arb_t b, ulong e, slong prec)
+{
+ slong i, wp, bits;
+
+ if (e <= 2)
+ {
+ if (e == 0)
+ arb_one(y);
+ else if (e == 1)
+ arb_set_round(y, b, prec);
+ else
+ arb_sqr(y, b, prec);
+ return;
+ }
+
+ if (y == b)
+ {
+ arb_t t;
+ arb_init(t);
+ arb_set(t, b);
+ arb_pow_ui_binexp(y, t, e, prec);
+ arb_clear(t);
+ return;
+ }
+
+ arb_set(y, b);
+
+ bits = FLINT_BIT_COUNT(e);
+ wp = ARF_PREC_ADD(prec, bits);
+
+ for (i = bits - 2; i >= 0; i--)
+ {
+ arb_sqr(y, y, wp);
+ if (((WORD(1) << i) & e) != 0)
+ arb_mul(y, y, b, wp);
+ }
+}
diff --git a/arb/pow_fmpz_binexp.c b/arb/pow_fmpz_binexp.c
deleted file mode 100644
index 4e4f66afd..000000000
--- a/arb/pow_fmpz_binexp.c
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- Copyright (C) 2012, 2013 Fredrik Johansson
-
- This file is part of Arb.
-
- Arb is free software: you can redistribute it and/or modify it under
- the terms of the GNU Lesser General Public License (LGPL) as published
- by the Free Software Foundation; either version 2.1 of the License, or
- (at your option) any later version. See .
-*/
-
-#include "arb.h"
-
-void
-arb_pow_fmpz_binexp(arb_t y, const arb_t b, const fmpz_t e, slong prec)
-{
- slong i, wp, bits;
-
- if (-WORD(2) <= *e && *e <= WORD(2))
- {
- if (*e == WORD(0))
- arb_one(y);
- else if (*e == WORD(1))
- arb_set_round(y, b, prec);
- else if (*e == -WORD(1))
- arb_inv(y, b, prec);
- else if (*e == WORD(2))
- arb_mul(y, b, b, prec);
- else
- {
- arb_inv(y, b, prec);
- arb_mul(y, y, y, prec);
- }
- return;
- }
-
- if (fmpz_sgn(e) < 0)
- {
- fmpz_t f;
- fmpz_init(f);
- fmpz_neg(f, e);
-
- if (arb_is_exact(b))
- {
- arb_pow_fmpz_binexp(y, b, f, prec + 2);
- arb_inv(y, y, prec);
- }
- else
- {
- arb_inv(y, b, prec + fmpz_bits(e) + 2);
- arb_pow_fmpz_binexp(y, y, f, prec);
- }
-
- fmpz_clear(f);
- return;
- }
-
- if (y == b)
- {
- arb_t t;
- arb_init(t);
- arb_set(t, b);
- arb_pow_fmpz_binexp(y, t, e, prec);
- arb_clear(t);
- return;
- }
-
- arb_set(y, b);
-
- bits = fmpz_bits(e);
- wp = ARF_PREC_ADD(prec, bits);
-
- for (i = bits - 2; i >= 0; i--)
- {
- arb_mul(y, y, y, wp);
- if (fmpz_tstbit(e, i))
- arb_mul(y, y, b, wp);
- }
-}
-
diff --git a/arb/pow_si.c b/arb/pow_si.c
new file mode 100644
index 000000000..e626a1bc8
--- /dev/null
+++ b/arb/pow_si.c
@@ -0,0 +1,37 @@
+/*
+ Copyright (C) 2021 Albin Ahlbäck
+
+ This file is part of Arb.
+
+ Arb is free software: you can redistribute it and/or modify it under
+ the terms of the GNU Lesser General Public License (LGPL) as published
+ by the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version. See .
+*/
+
+#include "arb.h"
+
+void
+arb_pow_si(arb_t y, const arb_t b, slong e, slong prec)
+{
+ if (e >= 0)
+ arb_pow_ui_binexp(y, b, e, prec);
+ else if (e == -1)
+ arb_inv(y, b, prec);
+ else if (e == -2)
+ {
+ arb_inv(y, b, prec);
+ arb_sqr(y, y, prec);
+ }
+ else if (arb_is_exact(b))
+ {
+ arb_pow_ui_binexp(y, b, -e, prec + 2);
+ arb_inv(y, y, prec);
+ }
+ else
+ {
+ e = -e;
+ arb_inv(y, b, prec + FLINT_BIT_COUNT(e) + 2);
+ arb_pow_ui_binexp(y, y, e, prec);
+ }
+}
diff --git a/arb/sqr.c b/arb/sqr.c
new file mode 100644
index 000000000..c6900379b
--- /dev/null
+++ b/arb/sqr.c
@@ -0,0 +1,67 @@
+/*
+ Copyright (C) 2021 Albin Ahlbäck
+
+ This file is part of Arb.
+
+ Arb is free software: you can redistribute it and/or modify it under
+ the terms of the GNU Lesser General Public License (LGPL) as published
+ by the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version. See .
+*/
+
+#include "arb.h"
+
+void
+arb_sqr(arb_t res, const arb_t x, slong prec)
+{
+ mag_t resr, xm;
+ int inexact;
+
+ if (arb_is_exact(x))
+ {
+ inexact = arf_sqr_rnd_down(arb_midref(res), arb_midref(x), prec);
+
+ if (inexact)
+ arf_mag_set_ulp(arb_radref(res), arb_midref(res), prec);
+ else
+ mag_zero(arb_radref(res));
+ }
+ else if (ARB_IS_LAGOM(x) && ARB_IS_LAGOM(res))
+ {
+ mag_fast_init_set_arf(xm, arb_midref(x));
+
+ mag_init(resr);
+ mag_fast_mul(resr, xm, arb_radref(x));
+ if (resr->exp < COEFF_MAX && resr->exp >= COEFF_MIN)
+ (resr->exp)++;
+ else
+ fmpz_add_ui(&(resr->exp), &(resr->exp), 1);
+ mag_fast_addmul(resr, arb_radref(x), arb_radref(x));
+
+ inexact = arf_sqr_rnd_down(arb_midref(res), arb_midref(x), prec);
+
+ if (inexact)
+ arf_mag_fast_add_ulp(resr, resr, arb_midref(res), prec);
+
+ *arb_radref(res) = *resr;
+ }
+ else
+ {
+ mag_init_set_arf(xm, arb_midref(x));
+
+ mag_init(resr);
+ mag_mul(resr, xm, arb_radref(x));
+ fmpz_add_ui(&(resr->exp), &(resr->exp), 1);
+ mag_addmul(resr, arb_radref(x), arb_radref(x));
+
+ inexact = arf_sqr_rnd_down(arb_midref(res), arb_midref(x), prec);
+
+ if (inexact)
+ arf_mag_add_ulp(arb_radref(res), resr, arb_midref(res), prec);
+ else
+ mag_swap(arb_radref(res), resr);
+
+ mag_clear(xm);
+ mag_clear(resr);
+ }
+}
diff --git a/arb/test/t-sqr.c b/arb/test/t-sqr.c
new file mode 100644
index 000000000..2c4fb8292
--- /dev/null
+++ b/arb/test/t-sqr.c
@@ -0,0 +1,111 @@
+/*
+ Copyright (C) 2021 Albin Ahlbäck
+
+ This file is part of Arb.
+
+ Arb is free software: you can redistribute it and/or modify it under
+ the terms of the GNU Lesser General Public License (LGPL) as published
+ by the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version. See .
+*/
+
+#include "arb.h"
+
+int
+mag_close(const mag_t am, const mag_t bm)
+{
+ arf_t t, a, b;
+ int res1, res2;
+
+ arf_init(t);
+ arf_init(a);
+ arf_init(b);
+
+ arf_set_mag(a, am);
+ arf_set_mag(b, bm);
+
+ arf_mul_ui(t, b, 257, MAG_BITS, ARF_RND_UP);
+ arf_mul_2exp_si(t, t, -8);
+ res1 = arf_cmp(a, t) <= 0;
+
+ arf_mul_ui(t, a, 257, MAG_BITS, ARF_RND_UP);
+ arf_mul_2exp_si(t, t, -8);
+ res2 = arf_cmp(b, t) <= 0;
+
+ arf_clear(t);
+ arf_clear(a);
+ arf_clear(b);
+
+ return res1 && res2;
+}
+
+int main()
+{
+ slong iter, iter2;
+ flint_rand_t state;
+
+ flint_printf("sqr....");
+ fflush(stdout);
+
+ flint_randinit(state);
+
+ for (iter = 0; iter < 10000 * arb_test_multiplier(); iter++)
+ {
+ arb_t x, z, v;
+ slong prec;
+
+ arb_init(x);
+ arb_init(z);
+ arb_init(v);
+
+ for (iter2 = 0; iter2 < 100; iter2++)
+ {
+ arb_randtest_special(x, state, n_randint(state,2) ? 2000 : 200, 200);
+
+ prec = 2 + n_randint(state, 2000);
+
+ switch (n_randint(state, 2))
+ {
+ case 0:
+ arb_mul(z, x, x, prec);
+ arb_sqr(v, x, prec);
+
+ if (!arf_equal(arb_midref(z), arb_midref(v))
+ || !mag_close(arb_radref(z), arb_radref(v)))
+ {
+ flint_printf("FAIL!\n");
+ flint_printf("x = "); arb_print(x); flint_printf("\n\n");
+ flint_printf("z = "); arb_print(z); flint_printf("\n\n");
+ flint_printf("v = "); arb_print(v); flint_printf("\n\n");
+ flint_abort();
+ }
+ break;
+
+ default:
+ arb_set(v, x);
+ arb_mul(z, x, x, prec);
+ arb_sqr(v, v, prec);
+
+ if (!arf_equal(arb_midref(v), arb_midref(z))
+ || !mag_close(arb_radref(v), arb_radref(z)))
+ {
+ flint_printf("FAIL (aliasing)!\n");
+ flint_printf("x = "); arb_print(x); flint_printf("\n\n");
+ flint_printf("v = "); arb_print(v); flint_printf("\n\n");
+ flint_printf("z = "); arb_print(z); flint_printf("\n\n");
+ flint_abort();
+ }
+ break;
+ }
+ }
+
+ arb_clear(x);
+ arb_clear(z);
+ arb_clear(v);
+ }
+
+ flint_randclear(state);
+ flint_cleanup();
+ flint_printf("PASS\n");
+ return EXIT_SUCCESS;
+}
diff --git a/arf.h b/arf.h
index 495293ce8..438c3b6c3 100644
--- a/arf.h
+++ b/arf.h
@@ -823,6 +823,18 @@ void arf_urandom(arf_t x, flint_rand_t state, slong bits, arf_rnd_t rnd);
r3 += r2 < t1; \
} while (0)
+#define nn_sqr_2(r3, r2, r1, r0, a1, a0) \
+ do { \
+ mp_limb_t t1, t2, t3; \
+ umul_ppmm(r1, r0, a0, a0); \
+ umul_ppmm(t2, t1, a1, a0); \
+ add_ssaaaa(r2, r1, t2, t1, 0, r1); \
+ umul_ppmm(r3, t3, a1, a1); \
+ add_ssaaaa(r3, t2, r3, t2, 0, t3); \
+ add_ssaaaa(r2, r1, r2, r1, t2, t1); \
+ r3 += r2 < t2; \
+ } while (0)
+
#define ARF_MPN_MUL(_z, _x, _xn, _y, _yn) \
if ((_xn) == (_yn)) \
{ \
@@ -914,6 +926,12 @@ int arf_mul_rnd_down(arf_ptr z, arf_srcptr x, arf_srcptr y, slong prec);
? arf_mul_rnd_down(z, x, y, prec) \
: arf_mul_rnd_any(z, x, y, prec, rnd))
+int arf_sqr_via_mpfr(arf_ptr res, arf_srcptr x, slong prec, arf_rnd_t rnd);
+
+int arf_sqr_rnd_down(arf_ptr res, arf_srcptr x, slong prec);
+
+void arf_sqr_special(arf_t res, const arf_t x);
+
ARF_INLINE int
arf_neg_mul(arf_t z, const arf_t x, const arf_t y, slong prec, arf_rnd_t rnd)
{
diff --git a/arf/mul_rnd_down.c b/arf/mul_rnd_down.c
index 731a1d545..2f9b4b2a0 100644
--- a/arf/mul_rnd_down.c
+++ b/arf/mul_rnd_down.c
@@ -83,7 +83,7 @@ arf_mul_rnd_down(arf_ptr z, arf_srcptr x, arf_srcptr y, slong prec)
{
ret = 0;
}
- else /* prec < FLINT_BITS < 2 * FLINT_BITS */
+ else /* FLINT_BITS < prec < 2 * FLINT_BITS */
{
ret = MASK_LIMB(lo, 2 * FLINT_BITS - prec) != lo;
lo = MASK_LIMB(lo, 2 * FLINT_BITS - prec);
@@ -183,7 +183,7 @@ arf_mul_rnd_down(arf_ptr z, arf_srcptr x, arf_srcptr y, slong prec)
}
else
{
- mp_size_t zn, alloc;
+ mp_size_t alloc;
mp_srcptr xptr, yptr;
mp_ptr tmp;
ARF_MUL_TMP_DECL
diff --git a/arf/sqr_rnd_down.c b/arf/sqr_rnd_down.c
new file mode 100644
index 000000000..1128f34f7
--- /dev/null
+++ b/arf/sqr_rnd_down.c
@@ -0,0 +1,181 @@
+/*
+ Copyright (C) 2021 Albin Ahlbäck
+
+ This file is part of Arb.
+
+ Arb is free software: you can redistribute it and/or modify it under
+ the terms of the GNU Lesser General Public License (LGPL) as published
+ by the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version. See .
+*/
+
+#include "arf.h"
+
+int
+arf_sqr_rnd_down(arf_ptr res, arf_srcptr x, slong prec)
+{
+ mp_size_t xn, resn;
+ mp_limb_t hi, lo;
+ slong expfix;
+ int ret, fix;
+ mp_ptr resptr;
+
+ xn = ARF_XSIZE(x);
+ xn >>= 1;
+
+ /* Either operand is a special value. */
+ if (xn == 0)
+ {
+ arf_sqr_special(res, x);
+ return 0;
+ }
+
+ if (xn == 1)
+ {
+ lo = ARF_NOPTR_D(x)[0];
+
+ umul_ppmm(hi, lo, lo, lo);
+
+ /* Shift so that the top bit is set (branch free version). */
+ fix = !(hi >> (FLINT_BITS - 1));
+ hi = (hi << fix) | ((lo >> (FLINT_BITS - 1)) & fix);
+ lo = (lo << fix);
+
+ ARF_DEMOTE(res);
+
+ if (lo == 0)
+ {
+ resn = 1;
+
+ if (prec >= FLINT_BITS)
+ {
+ lo = hi;
+ ret = 0;
+ }
+ else
+ {
+ lo = MASK_LIMB(hi, FLINT_BITS - prec);
+ ret = (lo != hi);
+ }
+ }
+ else
+ {
+ resn = 2;
+
+ if (prec <= FLINT_BITS) /* Must be inexact. */
+ {
+ lo = MASK_LIMB(hi, FLINT_BITS - prec);
+ resn = ret = 1;
+ }
+ else if (prec >= 2 * FLINT_BITS) /* Must be exact. */
+ {
+ ret = 0;
+ }
+ else /* FLINT_BITS < prec < 2 * FLINT_BITS */
+ {
+ ret = MASK_LIMB(lo, 2 * FLINT_BITS - prec) != lo;
+ lo = MASK_LIMB(lo, 2 * FLINT_BITS - prec);
+
+ if (lo == 0)
+ {
+ resn = 1;
+ lo = hi;
+ }
+ }
+ }
+
+ _fmpz_2times_fast(ARF_EXPREF(res), ARF_EXPREF(x), -fix);
+ ARF_XSIZE(res) = ARF_MAKE_XSIZE(resn, 0);
+
+ resptr = ARF_NOPTR_D(res);
+ resptr[0] = lo;
+ resptr[1] = hi;
+
+ return ret;
+ }
+ else if (xn == 2)
+ {
+ mp_limb_t zz[4];
+ mp_limb_t x1, x0;
+
+ x0 = ARF_NOPTR_D(x)[0];
+ x1 = ARF_NOPTR_D(x)[1];
+
+ nn_sqr_2(zz[3], zz[2], zz[1], zz[0], x1, x0);
+
+ /* Likely case, must be inexact */
+ if (prec <= 2 * FLINT_BITS)
+ {
+ ARF_DEMOTE(res);
+
+ fix = !(zz[3] >> (FLINT_BITS - 1));
+ zz[3] = (zz[3] << fix) | ((zz[2] >> (FLINT_BITS - 1)) & fix);
+ zz[2] = (zz[2] << fix) | ((zz[1] >> (FLINT_BITS - 1)) & fix);
+
+ _fmpz_2times_fast(ARF_EXPREF(res), ARF_EXPREF(x), -fix);
+
+ /* Rounding */
+ if (prec != 2 * FLINT_BITS)
+ {
+ if (prec > FLINT_BITS)
+ {
+ zz[2] &= (LIMB_ONES << (2 * FLINT_BITS - prec));
+ }
+ else if (prec == FLINT_BITS)
+ {
+ zz[2] = 0;
+ }
+ else
+ {
+ zz[3] &= (LIMB_ONES << (FLINT_BITS - prec));
+ zz[2] = 0;
+ }
+ }
+
+ if (zz[2] == 0)
+ {
+ ARF_XSIZE(res) = ARF_MAKE_XSIZE(1, 0);
+ ARF_NOPTR_D(res)[0] = zz[3];
+ }
+ else
+ {
+ ARF_XSIZE(res) = ARF_MAKE_XSIZE(2, 0);
+ ARF_NOPTR_D(res)[0] = zz[2];
+ ARF_NOPTR_D(res)[1] = zz[3];
+ }
+
+ return 1;
+ }
+
+ resn = 2 * xn;
+ ret = _arf_set_round_mpn(res, &expfix, zz, resn, 0, prec, ARF_RND_DOWN);
+ _fmpz_2times_fast(ARF_EXPREF(res), ARF_EXPREF(x), expfix);
+ return ret;
+ }
+ else if (xn > MUL_MPFR_MIN_LIMBS && prec != ARF_PREC_EXACT
+ && xn > 0.675 * prec / FLINT_BITS
+ && xn < MUL_MPFR_MAX_LIMBS) /* FIXME: proper cutoffs */
+ {
+ return arf_sqr_via_mpfr(res, x, prec, ARF_RND_DOWN);
+ }
+ else
+ {
+ mp_size_t alloc;
+ mp_srcptr xptr;
+ mp_ptr tmp;
+ ARF_MUL_TMP_DECL
+
+ ARF_GET_MPN_READONLY(xptr, xn, x);
+
+ alloc = resn = 2 * xn;
+ ARF_MUL_TMP_ALLOC(tmp, alloc)
+
+ mpn_sqr(tmp, xptr, xn);
+
+ ret = _arf_set_round_mpn(res, &expfix, tmp, resn, 0, prec, ARF_RND_DOWN);
+ _fmpz_2times_fast(ARF_EXPREF(res), ARF_EXPREF(x), expfix);
+ ARF_MUL_TMP_FREE(tmp, alloc)
+
+ return ret;
+ }
+}
diff --git a/arb/pow_fmpz.c b/arf/sqr_special.c
similarity index 51%
rename from arb/pow_fmpz.c
rename to arf/sqr_special.c
index dc470b274..e6e1b154c 100644
--- a/arb/pow_fmpz.c
+++ b/arf/sqr_special.c
@@ -1,5 +1,5 @@
/*
- Copyright (C) 2012, 2013 Fredrik Johansson
+ Copyright (C) 2021 Albin Ahlbäck
This file is part of Arb.
@@ -9,19 +9,15 @@
(at your option) any later version. See .
*/
-#include "arb.h"
+#include "arf.h"
void
-arb_pow_fmpz(arb_t y, const arb_t b, const fmpz_t e, slong prec)
+arf_sqr_special(arf_t res, const arf_t x)
{
- arb_pow_fmpz_binexp(y, b, e, prec);
-}
-
-void
-arb_pow_ui(arb_t y, const arb_t b, ulong e, slong prec)
-{
- fmpz_t f;
- fmpz_init_set_ui(f, e);
- arb_pow_fmpz(y, b, f, prec);
- fmpz_clear(f);
+ if (arf_is_zero(x))
+ arf_zero(res);
+ else if (arf_is_nan(x))
+ arf_nan(res);
+ else
+ arf_pos_inf(res);
}
diff --git a/arf/sqr_via_mpfr.c b/arf/sqr_via_mpfr.c
new file mode 100644
index 000000000..4e3d6697a
--- /dev/null
+++ b/arf/sqr_via_mpfr.c
@@ -0,0 +1,63 @@
+/*
+ Copyright (C) 2021 Albin Ahlbäck
+
+ This file is part of Arb.
+
+ Arb is free software: you can redistribute it and/or modify it under
+ the terms of the GNU Lesser General Public License (LGPL) as published
+ by the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version. See .
+*/
+
+#include "arf.h"
+
+int
+arf_sqr_via_mpfr(arf_ptr res, arf_srcptr x, slong prec, arf_rnd_t rnd)
+{
+ mp_size_t xn, resn, val;
+ mp_srcptr xptr;
+ mp_ptr tmp, resptr;
+ mpfr_t xf, resf;
+ int ret;
+ ARF_MUL_TMP_DECL
+
+ if (arf_is_special(x))
+ {
+ arf_sqr_special(res, x);
+ return 0;
+ }
+
+ ARF_GET_MPN_READONLY(xptr, xn, x);
+
+ prec = FLINT_MIN(xn * 2 * FLINT_BITS, prec);
+ resn = (prec + FLINT_BITS - 1) / FLINT_BITS;
+
+ ARF_MUL_TMP_ALLOC(tmp, resn)
+
+ resf->_mpfr_d = tmp;
+ resf->_mpfr_prec = prec;
+ resf->_mpfr_sign = 1;
+ resf->_mpfr_exp = 0;
+
+ xf->_mpfr_d = (mp_ptr) xptr;
+ xf->_mpfr_prec = xn * FLINT_BITS;
+ xf->_mpfr_sign = 1; /* Sign does not matter since we are squaring */
+ xf->_mpfr_exp = 0;
+
+ ret = mpfr_sqr(resf, xf, arf_rnd_to_mpfr(rnd));
+
+ ret = (ret != 0);
+
+ _fmpz_2times_fast(ARF_EXPREF(res), ARF_EXPREF(x), resf->_mpfr_exp);
+
+ val = 0;
+ while (tmp[val] == 0)
+ val++;
+
+ ARF_GET_MPN_WRITE(resptr, resn - val, res);
+ flint_mpn_copyi(resptr, tmp + val, resn - val);
+
+ ARF_MUL_TMP_FREE(tmp, resn)
+
+ return ret;
+}
diff --git a/arf/test/t-nn_sqr_2.c b/arf/test/t-nn_sqr_2.c
new file mode 100644
index 000000000..e0654eac8
--- /dev/null
+++ b/arf/test/t-nn_sqr_2.c
@@ -0,0 +1,45 @@
+/*
+ Copyright (C) 2021 Albin Ahlbäck
+
+ This file is part of Arb.
+
+ Arb is free software: you can redistribute it and/or modify it under
+ the terms of the GNU Lesser General Public License (LGPL) as published
+ by the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version. See .
+*/
+
+#include "arf.h"
+
+int main()
+{
+ slong ix, result;
+ flint_rand_t state;
+
+ flint_printf("nn_sqr_2....");
+ fflush(stdout);
+
+ flint_randinit(state);
+
+ for (ix = 0; ix < 10000 * arb_test_multiplier(); ix++)
+ {
+ ulong r0, r1, r2, r3, s0, s1, s2, s3, a0, a1;
+
+ a0 = n_randtest(state);
+ a1 = n_randtest(state);
+
+ nn_mul_2x2(r3, r2, r1, r0, a1, a0, a1, a0);
+ nn_sqr_2(s3, s2, s1, s0, a1, a0);
+
+ result = (s3 == r3 && s2 == r2 && s1 == r1 && s0 == r0);
+ if (!result)
+ {
+ flint_printf("FAIL!\n");
+ flint_abort();
+ }
+ }
+
+ flint_randclear(state);
+ flint_printf("PASS\n");
+ return EXIT_SUCCESS;
+}
diff --git a/arf/test/t-sqr_rnd_down.c b/arf/test/t-sqr_rnd_down.c
new file mode 100644
index 000000000..f62aca379
--- /dev/null
+++ b/arf/test/t-sqr_rnd_down.c
@@ -0,0 +1,65 @@
+/*
+ Copyright (C) 2021 Albin Ahlbäck
+
+ This file is part of Arb.
+
+ Arb is free software: you can redistribute it and/or modify it under
+ the terms of the GNU Lesser General Public License (LGPL) as published
+ by the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version. See .
+*/
+
+#include "arf.h"
+
+int main()
+{
+ slong iter, iter2;
+ flint_rand_t state;
+
+ flint_printf("sqr_rnd_down....");
+ fflush(stdout);
+
+ flint_randinit(state);
+
+ for (iter = 0; iter < 10000 * arb_test_multiplier(); iter++)
+ {
+ arf_t x, z, v;
+ slong prec, r1, r2;
+ arf_rnd_t rnd;
+
+ arf_init(x);
+ arf_init(z);
+ arf_init(v);
+
+ for (iter2 = 0; iter2 < 100; iter2++)
+ {
+ arf_randtest_special(x, state, 2000, 100);
+ prec = 2 + n_randint(state, 2000);
+
+ if (n_randint(state, 50) == 0)
+ prec = ARF_PREC_EXACT;
+
+ r1 = arf_mul_rnd_down(z, x, x, prec);
+ r2 = arf_sqr_rnd_down(v, x, prec);
+ if (!arf_equal(z, v) || r1 != r2)
+ {
+ flint_printf("FAIL!\n");
+ flint_printf("prec = %wd, rnd = %d\n\n", prec, rnd);
+ flint_printf("x = "); arf_print(x); flint_printf("\n\n");
+ flint_printf("z = "); arf_print(z); flint_printf("\n\n");
+ flint_printf("v = "); arf_print(v); flint_printf("\n\n");
+ flint_printf("r1 = %wd, r2 = %wd\n", r1, r2);
+ flint_abort();
+ }
+ }
+
+ arf_clear(x);
+ arf_clear(z);
+ arf_clear(v);
+ }
+
+ flint_randclear(state);
+ flint_cleanup();
+ flint_printf("PASS\n");
+ return EXIT_SUCCESS;
+}
diff --git a/arf/test/t-sqr_via_mpfr.c b/arf/test/t-sqr_via_mpfr.c
new file mode 100644
index 000000000..94b924b42
--- /dev/null
+++ b/arf/test/t-sqr_via_mpfr.c
@@ -0,0 +1,94 @@
+/*
+ Copyright (C) 2021 Albin Ahlbäck
+
+ This file is part of Arb.
+
+ Arb is free software: you can redistribute it and/or modify it under
+ the terms of the GNU Lesser General Public License (LGPL) as published
+ by the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version. See .
+*/
+
+#include "arf.h"
+
+int main()
+{
+ slong iter, iter2;
+ flint_rand_t state;
+
+ flint_printf("sqr_via_mpfr....");
+ fflush(stdout);
+
+ flint_randinit(state);
+
+ for (iter = 0; iter < 10000 * arb_test_multiplier(); iter++)
+ {
+ arf_t x, z, v;
+ slong prec, r1, r2;
+ arf_rnd_t rnd;
+
+ arf_init(x);
+ arf_init(z);
+ arf_init(v);
+
+ for (iter2 = 0; iter2 < 100; iter2++)
+ {
+ arf_randtest_special(x, state, 2000, 100);
+ prec = 2 + n_randint(state, 2000);
+
+ if (n_randint(state, 50) == 0)
+ prec = ARF_PREC_EXACT;
+
+ switch (n_randint(state, 5))
+ {
+ case 0: rnd = ARF_RND_DOWN; break;
+ case 1: rnd = ARF_RND_UP; break;
+ case 2: rnd = ARF_RND_FLOOR; break;
+ case 3: rnd = ARF_RND_CEIL; break;
+ default: rnd = ARF_RND_NEAR; break;
+ }
+
+ switch (n_randint(state, 2))
+ {
+ case 0:
+ r1 = arf_mul_via_mpfr(z, x, x, prec, rnd);
+ r2 = arf_sqr_via_mpfr(v, x, prec, rnd);
+ if (!arf_equal(z, v) || r1 != r2)
+ {
+ flint_printf("FAIL!\n");
+ flint_printf("prec = %wd, rnd = %d\n\n", prec, rnd);
+ flint_printf("x = "); arf_print(x); flint_printf("\n\n");
+ flint_printf("z = "); arf_print(z); flint_printf("\n\n");
+ flint_printf("v = "); arf_print(v); flint_printf("\n\n");
+ flint_printf("r1 = %wd, r2 = %wd\n", r1, r2);
+ flint_abort();
+ }
+ break;
+
+ default:
+ r1 = arf_mul_via_mpfr(v, x, x, prec, rnd);
+ r2 = arf_sqr_via_mpfr(x, x, prec, rnd);
+ if (!arf_equal(v, x) || r1 != r2)
+ {
+ flint_printf("FAIL (aliasing)!\n");
+ flint_printf("prec = %wd, rnd = %d\n\n", prec, rnd);
+ flint_printf("x = "); arf_print(x); flint_printf("\n\n");
+ flint_printf("z = "); arf_print(z); flint_printf("\n\n");
+ flint_printf("v = "); arf_print(v); flint_printf("\n\n");
+ flint_printf("r1 = %wd, r2 = %wd\n", r1, r2);
+ flint_abort();
+ }
+ break;
+ }
+ }
+
+ arf_clear(x);
+ arf_clear(z);
+ arf_clear(v);
+ }
+
+ flint_randclear(state);
+ flint_cleanup();
+ flint_printf("PASS\n");
+ return EXIT_SUCCESS;
+}
diff --git a/doc/source/arb.rst b/doc/source/arb.rst
index 70837041e..30ca849f7 100644
--- a/doc/source/arb.rst
+++ b/doc/source/arb.rst
@@ -989,28 +989,47 @@ Powers and roots
Alias for :func:`arb_root_ui`, provided for backwards compatibility.
-.. function:: void arb_sqr(arb_t y, const arb_t x, slong prec)
+.. function:: void arb_sqr(arb_t res, const arb_t x, slong prec)
- Sets *y* to be the square of *x*.
+ Sets *res* to the square of `x`.
+
+.. function:: void _arb_pow_mpz_binexp(arb_t y, const arb_t b, mpz_srcptr e, slong prec)
+
+.. function:: void arb_pow_ui_binexp(arb_t y, const arb_t b, ulong e, slong prec)
+
+ Sets `y = b^e` via binary exponentiation with an initial division if
+ `e < 0`. Provided that `b` and `e` are small enough and the exponent is
+ positive, the exact power can be computed by setting the precision to
+ *ARF_PREC_EXACT*.
+
+ Note that these functions can get slow if the exponent is
+ extremely large (in such cases :func:`arb_pow` may be superior).
+
+.. function:: void arb_pow_si(arb_t y, const arb_t b, slong e, slong prec);
+
+ Sets `y = b^e` by wrapping *arb_pow_ui*, doing an initial division if
+ `e < 0`.
.. function:: void arb_pow_fmpz_binexp(arb_t y, const arb_t b, const fmpz_t e, slong prec)
+ Sets `y = b^e` by wrapping *arb_pow_si* and *_arb_pow_mpz_binexp*.
+
.. function:: void arb_pow_fmpz(arb_t y, const arb_t b, const fmpz_t e, slong prec)
+ Wrapper for *arb_pow_fmpz_binexp*.
+
.. function:: void arb_pow_ui(arb_t y, const arb_t b, ulong e, slong prec)
+ Wrapper for *arb_pow_ui_binexp*.
+
.. function:: void arb_ui_pow_ui(arb_t y, ulong b, ulong e, slong prec)
.. function:: void arb_si_pow_ui(arb_t y, slong b, ulong e, slong prec)
- Sets `y = b^e` using binary exponentiation (with an initial division
- if `e < 0`). Provided that *b* and *e*
+ Sets `y = b^e` using binary exponentiation. Provided that *b* and *e*
are small enough and the exponent is positive, the exact power can be
computed by setting the precision to *ARF_PREC_EXACT*.
- Note that these functions can get slow if the exponent is
- extremely large (in such cases :func:`arb_pow` may be superior).
-
.. function:: void arb_pow_fmpq(arb_t y, const arb_t x, const fmpq_t a, slong prec)
Sets `y = b^e`, computed as `y = (b^{1/q})^p` if the denominator of
diff --git a/doc/source/arf.rst b/doc/source/arf.rst
index 0d8ba4fd3..3d4e4b9ae 100644
--- a/doc/source/arf.rst
+++ b/doc/source/arf.rst
@@ -111,6 +111,18 @@ Types, macros and constants
test code. If in doubt, simply try with some convenient high precision
instead of using this special value, and check that the result is exact.
+.. macro:: nn_mul_2x1(r2, r1, r0, a1, a0, b0)
+
+ Sets `(r_2, r_1, r_0)` to `(a_1, a_0)` multiplied by `b_0`.
+
+.. macro:: nn_mul_2x2(r3, r2, r1, r0, a1, a0, b1, b0)
+
+ Sets `(r_2, r_1, r_0)` to `(a_1, a_0)` multiplied by `(b_1, b_0)`.
+
+.. macro:: nn_sqr_2(r3, r2, r1, r0, a1, a0)
+
+ Sets `(r_2, r_1, r_0)` to the square of `(a_1, a_0)`.
+
Memory management
-------------------------------------------------------------------------------
@@ -553,7 +565,8 @@ Addition and multiplication
.. function:: int arf_neg_round(arf_t res, const arf_t x, slong prec, arf_rnd_t rnd)
- Sets *res* to `-x`.
+ Sets *res* to `-x`. Returns 0 if this operation was made exactly and 1 if
+ truncation occurred.
.. function:: int arf_add(arf_t res, const arf_t x, const arf_t y, slong prec, arf_rnd_t rnd)
@@ -563,11 +576,13 @@ Addition and multiplication
.. function:: int arf_add_fmpz(arf_t res, const arf_t x, const fmpz_t y, slong prec, arf_rnd_t rnd)
- Sets *res* to `x + y`.
+ Sets *res* to `x + y`. Returns 0 if this operation was made exactly and 1
+ if truncation occurred.
.. function:: int arf_add_fmpz_2exp(arf_t res, const arf_t x, const fmpz_t y, const fmpz_t e, slong prec, arf_rnd_t rnd)
- Sets *res* to `x + y 2^e`.
+ Sets *res* to `x + y 2^e`. Returns 0 if this operation was made exactly and
+ 1 if truncation occurred.
.. function:: int arf_sub(arf_t res, const arf_t x, const arf_t y, slong prec, arf_rnd_t rnd)
@@ -577,7 +592,8 @@ Addition and multiplication
.. function:: int arf_sub_fmpz(arf_t res, const arf_t x, const fmpz_t y, slong prec, arf_rnd_t rnd)
- Sets *res* to `x - y`.
+ Sets *res* to `x - y`. Returns 0 if this operation was made exactly and 1
+ if truncation occurred.
.. function:: void arf_mul_2exp_si(arf_t res, const arf_t x, slong e)
@@ -595,7 +611,31 @@ Addition and multiplication
.. function:: int arf_mul_fmpz(arf_t res, const arf_t x, const fmpz_t y, slong prec, arf_rnd_t rnd)
- Sets *res* to `x \cdot y`.
+ Sets *res* to `x \cdot y`. Returns 0 if this operation was made exactly and
+ 1 if truncation occurred.
+
+.. function:: void arf_mul_special(arf_t z, const arf_t x, const arf_t y)
+
+ Sets *res* to the special value of `x \cdot y`.
+
+ Note that it is assumed that at least one of `x` and `y` has special
+ values.
+
+.. function:: int arf_sqr_via_mpfr(arf_ptr res, arf_srcptr x, slong prec, arf_rnd_t rnd)
+
+ Sets *res* to `x^2` by calling MPFR. Returns 0 if this operation was made
+ exactly and 1 if truncation occurred.
+
+.. function:: int arf_sqr_rnd_down(arf_ptr res, arf_srcptr x, slong prec)
+
+ Sets *res* to `x^2`. If the opertation was made exactly, it returns 1.
+ Otherwise it rounds downwards and returns 1.
+
+.. function:: void arf_sqr_special(arf_t res, const arf_t x)
+
+ Sets *res* to the special value of `x^2`.
+
+ Note that it is assumed that `x` has a special value.
.. function:: int arf_addmul(arf_t z, const arf_t x, const arf_t y, slong prec, arf_rnd_t rnd)
@@ -607,7 +647,8 @@ Addition and multiplication
.. function:: int arf_addmul_fmpz(arf_t z, const arf_t x, const fmpz_t y, slong prec, arf_rnd_t rnd)
- Performs a fused multiply-add `z = z + x \cdot y`, updating *z* in-place.
+ Adds `x \cdot y` to `z`, updating `z` in-place. Returns 0 if this operation
+ was made exactly and 1 if truncation occurred.
.. function:: int arf_submul(arf_t z, const arf_t x, const arf_t y, slong prec, arf_rnd_t rnd)
@@ -619,16 +660,21 @@ Addition and multiplication
.. function:: int arf_submul_fmpz(arf_t z, const arf_t x, const fmpz_t y, slong prec, arf_rnd_t rnd)
- Performs a fused multiply-subtract `z = z - x \cdot y`, updating *z* in-place.
+ Performs a fused multiply-subtract `z = z - x \cdot y`, updating *z*
+ in-place. Returns 0 if this operation was made exactly and 1 if truncation
+ occurred.
.. function:: int arf_fma(arf_t res, const arf_t x, const arf_t y, const arf_t z, slong prec, arf_rnd_t rnd)
Sets *res* to `x \cdot y + z`. This is equivalent to an *addmul* except
- that *res* and *z* can be separate variables.
+ that *res* and *z* can be separate variables. Returns 0 if this operation
+ was made exactly and 1 if truncation occurred.
.. function:: int arf_sosq(arf_t res, const arf_t x, const arf_t y, slong prec, arf_rnd_t rnd)
- Sets *res* to `x^2 + y^2`, rounded to *prec* bits in the direction specified by *rnd*.
+ Sets *res* to `x^2 + y^2`, rounded to *prec* bits in the direction
+ specified by *rnd*. Returns 0 if this operation was made exactly and 1 if
+ truncation occurred.
Summation
-------------------------------------------------------------------------------
@@ -662,8 +708,9 @@ Division
.. function:: int arf_fmpz_div_fmpz(arf_t res, const fmpz_t x, const fmpz_t y, slong prec, arf_rnd_t rnd)
- Sets *res* to `x / y`, rounded to *prec* bits in the direction specified by *rnd*,
- returning nonzero iff the operation is inexact. The result is NaN if *y* is zero.
+ Sets *res* to `x / y`, rounded to *prec* bits in the direction specified by
+ *rnd*, returning nonzero iff the operation is inexact. The result is NaN if
+ *y* is zero.
Square roots
-------------------------------------------------------------------------------
diff --git a/mag.h b/mag.h
index 00eb44fa9..917798677 100644
--- a/mag.h
+++ b/mag.h
@@ -112,6 +112,23 @@ _fmpz_sub2_fast(fmpz_t z, const fmpz_t x, const fmpz_t y, slong c)
}
}
+static __inline__ void
+_fmpz_2times_fast(fmpz_t res, const fmpz_t x, slong c)
+{
+ slong cx = *x;
+
+ if (!COEFF_IS_MPZ(*res) && (cx > COEFF_MIN / 2 && cx < COEFF_MAX / 2))
+ {
+ *res = 2 * cx + c;
+ }
+ else
+ {
+ fmpz_mul_2exp(res, x, 1);
+ if (c != 0)
+ fmpz_add_si(res, res, c);
+ }
+}
+
#define MAG_EXP_POS_INF (COEFF_MIN+1)