-
Notifications
You must be signed in to change notification settings - Fork 0
/
primefac.py
1215 lines (1127 loc) · 50.9 KB
/
primefac.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/python3
from math import sqrt, log2, ceil, gcd, inf
from itertools import takewhile, compress, count
from multiprocessing import Process, Queue as mpQueue
from random import randrange#, seed; seed(0)
from time import time
from datetime import datetime, timedelta
version = "2.0.12"
def primegen(limit=inf):
"""
Generates primes < limit almost lazily by a segmented sieve of Eratosthenes.
Memory usage depends on the sequence of prime gaps. Unconditionally, we can
say that memory usage is O(p^0.7625), where p is the most-recently-yielded
prime. On the Riemann Hypothesis and Cramer's conjecture, we can bring this
down to O(p^0.75 * log(p)) and O(sqrt(p) * log(p)^2), respectively.
Input: limit -- a number (default = inf)
Output: sequence of integers
Examples:
>>> list(islice(primegen(), 20))
[2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71]
>>> list(primegen(73))
[2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71]
"""
# We don't sieve 2, so we ought to be able to get sigificant savings by halving the length of the sieve.
# But the tiny extra computation involved in that seems to exceed the savings.
yield from takewhile(lambda x: x < limit, (2,3,5,7,11,13,17,19,23,29,31,37,41,43,47))
pl, pg = [3,5,7], primegen()
for p in pl: next(pg)
n = next(pg); nn = n*n
while True:
n = next(pg)
ll, nn = nn, n*n
sl = (nn - ll)
sieve = bytearray([True]) * sl
for p in pl:
k = (-ll) % p
sieve[k::p] = bytearray([False]) * ((sl-k)//p + 1)
if nn > limit: break # TODO bring this condition up to the while statement
yield from compress(range(ll,ll+sl,2), sieve[::2])
pl.append(n)
yield from takewhile(lambda x: x < limit, compress(range(ll,ll+sl,2), sieve[::2]))
def ilog(x, b): # TODO: investigate optimization starting from x.bin_length() * 2 // b
"""
Greatest integer l such that b**l <= x
Input: x, b -- integers
Output: An integer
Examples:
>>> ilog(263789, 10)
5
>>> ilog(1023, 2)
9
"""
l = 0
while x >= b:
x //= b
l += 1
return l
# TODO possible optimization route: x.bit_length() == ilog(x, 2) + 1; we can therefore use x.bit_length() * 2 // b as a
# 1st approximation to ilog(x, b), then compute pow(b, x.bit_length() * 2 // b), then compare that to x and adjust.
try: from math import isqrt
except ImportError:
def isqrt(n):
"""
Greatest integer less than or equal to the square root of n.
Shamelessly stolen from https://codegolf.stackexchange.com/a/9088.
Input: n -- a whole number
Output: An integer
Examples:
>>> list(map(isqrt, range(25)))
[0, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4]
"""
if n < 0: return int(n)
c = n*4//3
d = c.bit_length()
a = d>>1
if d&1:
x = 1 << a
y = (x + (n >> a)) >> 1
else:
x = (3 << a) >> 2
y = (x + (c >> a)) >> 1
if x != y:
x, y = y, (y + n//y) >> 1
while y < x: x, y = y, (y + n//y) >> 1
return x
try: from math import prod as iterprod
except ImportError:
def iterprod(l):
"""
Product of the elements of any iterable. Product of empty iterable == 1.
Input: l -- iterable
Output: A number
Examples:
>>> iterprod(range(1, 8))
5040
"""
z = 1
for x in l: z *= x
return z
try:
pow(2, -1, 3)
def modinv(a, m):
"""
Returns the inverse of a modulo m, normalized to lie between 0 and m-1. If
a is not coprime to m, return None.
Input:
a -- an integer coprime to m
n -- a positive integer
Output: None or an integer x between 0 and m-1 such that (a * x) % m == 1
Examples:
>>> [modinv(1,1), modinv(2,5), modinv(5,8), modinv(37,100), modinv(12,30)]
[0, 3, 5, 73, None]
"""
return pow(a, -1, m)
except ValueError:
def modinv(a, m):
"""
Returns the inverse of a modulo m, normalized to lie between 0 and m-1. If
a is not coprime to m, return None.
Input:
a -- an integer coprime to m
n -- a positive integer
Output: None or an integer x between 0 and m-1 such that (a * x) % m == 1
Examples:
>>> [modinv(1,1), modinv(2,5), modinv(5,8), modinv(37,100), modinv(12,30)]
[0, 3, 5, 73, None]
"""
if m <= 0: return None
M, x, r = m, 1, 0
while m != 0:
q = a//m
a, m, r, x = m, a%m, x-q*r, r
return x % M if a == 1 else None
def introot(n, r=2): # TODO Newton iteration?
"""
For returns the rth root of n, rounded to the nearest integer in the
direction of zero. Returns None if r is even and n is negative.
Input:
n -- an integer
r -- a natural number or None
Output: An integer
Examples:
>>> [introot(-729, 3), introot(-728, 3), introot(1023, 2), introot(1024, 2)]
[-9, -8, 31, 32]
"""
if n < 0: return None if r%2 == 0 else -introot(-n, r)
if n < 2: return n
if r == 1: return n
if r == 2: return isqrt(n)
#if r % 2 == 0: return introot(isqrt(n), r//2) # TODO Check validity of this line.
lower = upper = 1 << (n.bit_length() // r)
while lower ** r > n: lower >>= 2
while upper ** r <= n: upper <<= 2
while lower != upper - 1:
mid = (lower + upper) // 2
m = mid**r
if m == n: return mid
elif m < n: lower = mid
elif m > n: upper = mid
return lower
def ispower(n, r=0):
"""
If r == 0:
If n is a perfect power, return a tuple containing largest integer (in
terms of magnitude) that, when squared/cubed/etc, yields n as the first
component and the relevant power as the second component.
If n is not a perfect power, return None.
If r > 0:
We check whether n is a perfect rth power; we return its rth root if it
is and None if it isn't.
Input:
n -- an integer
r -- an integer
Output: An integer, a 2-tuple of integers, or None
Examples:
>>> [ispower(n) for n in [64, 25, -729, 1729]]
[(8, 2), (5, 2), (-9, 3), None]
>>> [ispower(64, r) for r in range(7)]
[(8, 2), 64, 8, 4, None, None, 2]
"""
#if r == 0: return any(ispower(n, r) for r in primegen(n.bit_length()+1))
#return n == introot(n, r) ** r
if r == 0:
if n in (0, 1, -1): return (n, 1)
for r in primegen(n.bit_length()+1):
x = ispower(n, r)
if x is not None: return (x, r)
return None
# TODO tricks for special cases
if (r == 2) and (n & 2): return None
if (r == 3) and (n & 7) in (2,4,6): return None
x = introot(n, r)
return None if x is None else (x if x**r == n else None)
def jacobi(a, n):
"""
The Jacobi symbol (a|n).
Input:
a -- any integer
n -- odd integer
Output: -1, 0, or 1
Examples:
>>> [jacobi(a, 15) for a in [-10, -7, -4, -2, -1, 0, 1, 2, 4, 7, 10]]
[0, 1, -1, -1, -1, 0, 1, 1, 1, -1, 0]
>>> [jacobi(a, 13) for a in [-10, -9, -4, -2, -1, 0, 1, 2, 4, 9, 10]]
[1, 1, 1, -1, 1, 0, 1, -1, 1, 1, 1]
>>> [jacobi(a, 11) for a in [-10, -9, -4, -2, -1, 0, 1, 2, 4, 9, 10]]
[1, -1, -1, 1, -1, 0, 1, -1, 1, 1, -1]
"""
if (n%2 == 0) or (n < 0): return None # n must be a positive odd number TODO delete this check?
if (a == 0) or (a == 1): return a
a, t = a%n, 1
while a != 0:
while not a & 1:
a //= 2
if n & 7 in (3, 5): t *= -1
a, n = n, a
if (a & 3 == 3) and (n & 3) == 3: t *= -1
a %= n
return t if n == 1 else 0
def isprime(n, tb=(3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59)): # TODO optimize the basis, possibly by varying it with n
"""
BPSW primality test variant: we use the strong Lucas PRP test and preface
the computation with trial division for speed. No composites are known to
pass the test, though it is suspected that infinitely many will do so.
There are definitely no such errors below 2^64.
This function is mainly a streamlined version of bpsw().
Input:
n -- integer. Number to be examined.
tb -- iterable of primes. Basis for trial division.
Output: True if probably prime; False if definitely composite.
Examples:
>>> [n for n in range(91) if isprime(1000*n+1)]
[3, 4, 7, 9, 13, 16, 19, 21, 24, 28, 51, 54, 55, 61, 69, 70, 76, 81, 88, 90]
>>> [isprime(rpn("38 ! 1 +")), isprime(rpn("38 ! 1 -"))]
[False, True]
"""
# 1. Do some trial division with tb as the basis.
if n % 2 == 0 or n < 3: return n == 2
for p in tb:
if n % p == 0: return n == p
# 2. If sprp(n,2) fails, return False. If it succeeds, continue.
t, s = (n - 1) // 2, 1
while t % 2 == 0: t //= 2; s += 1
#assert 1 + 2**s * t == n
x = pow(2, t, n)
if x != 1 and x != n - 1:
for j in range(1, s):
x = pow(x, 2, n)
if x == 1: return False
elif x == n - 1: break
else: return False
# 3. Select parameters for slprp.
for D in count(5, 4):
j = jacobi(D, n)
if j == 0: return D == n
if j == -1: break
D = -2 - D
j = jacobi(D, n)
if j == 0: return -D == n
if j == -1: break
if D == -13 and ispower(n,2): return False # If n is square, then this loop amounts to very slow trial division.
# Now run slprp(n, 1, (1 - D) // 4) and return the result.
b = (1 - D) // 4
if 1 < gcd(n, b) < n: return False
s, t = 1, (n + 1) // 2
while t % 2 == 0: s += 1; t //= 2
v, w, q, Q = 2, 1, 1, 1
for k in bin(t)[2:]:
q = (q*Q) % n
if k == '1': Q, v, w = (q*b) % n, (w*v - q) % n, (w*w - 2*q*b) % n
else: Q, w, v = q , (w*v - q) % n, (v*v - 2*q ) % n
# assert ( (2*w-v) * modinv(D,n) ) % n, v == lucasmod(t, 1, b, n)
if v == 0 or ( (2*w-v) * modinv(D,n) ) % n == 0: return True
q = pow(b, t, n)
for _ in range(1, s):
v = (v*v - 2*q) % n
if v == 0: return True
q = (q*q) % n
return False
def pollardrho_brent(n, verbose=False):
"""
Factors integers using Brent's variation of Pollard's rho algorithm.
If n is prime, we immediately return n; if not, we keep chugging until a
nontrivial factor is found. This function calls the randomizer; two
successive calls may therefore return two different results.
Input: n -- number to factor
Output: A factor of n.
Examples:
>>> n = rpn("20 ! 1 +"); f = pollardrho_brent(n); n % f
0
"""
if isprime(n): return n
g = n
while g == n:
y, c, m, g, r, q = randrange(1, n), randrange(1, n), randrange(1, n), 1, 1, 1
while g==1:
x, k = y, 0
for i in range(r): y = (y**2 + c) % n
while k < r and g == 1:
ys = y
for i in range(min(m, r-k)):
y = (y**2 + c) % n
q = q * abs(x-y) % n
g, k = gcd(q, n), k+m
r *= 2
if g==n:
while True:
ys = (ys**2+c)%n
g = gcd(x-ys, n)
if g > 1: break
return g
def pollard_pm1(n, B1=100, B2=1000, verbose=False): # TODO: What are the best default bounds and way to increment them?
"""
Integer factoring function. Uses Pollard's p-1 algorithm. Note that this
is only efficient if the number to be factored has a prime factor p such
that p-1's largest prime factor is "small". In this implementation, that
tends to mean less than 10,000,000 or so.
Input:
n -- number to factor
B1 -- Natural number. Bound for phase 1. Default == 100.
B2 -- Natural number > B1. Bound for phase 2. Default == 1000.
Output: A factor of n.
Examples:
>>> pollard_pm1(rpn("28 ! 1 - 239 //"))
1224040923709997
"""
if isprime(n): return n
m = ispower(n)
if m: return m[0]
while True:
pg = primegen()
q = 2 # TODO: what about other initial values of q?
p = next(pg)
while p <= B1: q, p = pow(q, p**ilog(B1, p), n), next(pg)
g = gcd(q-1, n)
if 1 < g < n: return g
while p <= B2: q, p = pow(q, p, n), next(pg)
g = gcd(q-1, n)
if 1 < g < n: return g
# These bounds failed. Increase and try again.
B1 *= 10
B2 *= 10
def mlucas(v, a, n):
# Helper for williams_pp1(). Multiplies along a Lucas sequence mod n.
v1, v2 = v, (v**2 - 2) % n
for bit in bin(a)[3:]: v1, v2 = ((v1**2 - 2) % n, (v1*v2 - v) % n) if bit == "0" else ((v1*v2 - v) % n, (v2**2 - 2) % n)
return v1
def williams_pp1(n, verbose=False): # TODO: experiment with different values of v0, and implement the two-phase version
"""
Integer factoring function. Uses Williams' p+1 algorithm, single-stage
variant. Note that this is only efficient when the number to be factored
has a prime factor p such that p+1's largest prime factor is "small".
Input: n -- integer to factor
Output: Integer. A nontrivial factor of n.
Example:
>>> williams_pp1(315951348188966255352482641444979927)
12403590655726899403
"""
if isprime(n): return n
m = ispower(n)
if m: return m[0]
for v in count(3):
for p in primegen():
e = ilog(isqrt(n), p)
if e == 0: break
for _ in range(e): v = mlucas(v, p, n)
g = gcd(v - 2, n)
if 1 < g < n: return g
if g == n: break
def ecadd(p1, p2, p0, n):
# Helper for ecm(). Adds two points on a Montgomery curve mod n.
x1,z1 = p1; x2,z2 = p2; x0,z0 = p0
t1, t2 = (x1-z1)*(x2+z2), (x1+z1)*(x2-z2)
return (z0*pow(t1+t2,2,n) % n, x0*pow(t1-t2,2,n) % n)
def ecdub(p, A, n):
# Helper for ecm(). Doubles a point on a Montgomery curve mod n.
x, z = p; An, Ad = A
t1, t2 = pow(x+z,2,n), pow(x-z,2,n)
t = t1 - t2
return (t1*t2*4*Ad % n, (4*Ad*t2 + t*An)*t % n)
def ecmul(m, p, A, n):
# Helper for ecm(). Multiplies a point on a Montgomery curve mod n.
if m == 0: return (0, 0)
elif m == 1: return p
else:
q = ecdub(p, A, n)
if m == 2: return q
b = 1
while b < m: b *= 2
b //= 4
r = p
while b:
if m&b: q, r = ecdub(q, A, n), ecadd(q, r, p, n)
else: q, r = ecadd(r, q, p, n), ecdub(r, A, n)
b //= 2
return r
def secm(n, B1, B2, seed):
"""
Seeded ECM. Helper function for ecm(). Returns a possibly-trivial divisor
of n given two bounds and a seed. Uses the two-phase algorithm on
Montgomery curves. See https://wp.me/prTJ7-zI and https://wp.me/prTJ7-A7
for more details. Most of the code for this function's "helpers" were
shamelessly copied from the first of those links.
Input:
n -- Integer to factor
B1 -- Integer. Number of iterations for the first phase.
B2 -- Integer. Number of iterations for the second phase.
seed -- Integer. Selects the specific curve we'll be working on.
Output: Integer. A possibly-trivial factor of n.
Examples:
>>> secm(rpn('24 ! 1 -'), 100, 1000, 22)
991459181683
"""
u, v = (seed**2 - 5) % n, 4*seed % n
p = pow(u, 3, n)
Q, C = (pow(v-u,3,n)*(3*u+v) % n, 4*p*v % n), (p, pow(v,3,n))
pg = primegen()
p = next(pg)
while p <= B1: Q, p = ecmul(p**ilog(B1, p), Q, C, n), next(pg)
g = gcd(Q[1], n)
if 1 < g < n: return g
while p <= B2:
# There is a trick that can speed up the second stage. Instead of multiplying each prime by Q, we iterate over i from
# B1 + 1 to B2, adding 2Q at each step; when i is prime, the current Q can be accumulated into the running solution.
# Again, we defer the calculation of the GCD until the end of the iteration. TODO: Implement and compare performance.
Q = ecmul(p, Q, C, n)
g *= Q[1]
g %= n
p = next(pg)
return gcd(g, n)
def ecmparams(n): # TODO: Better parameters.
counter = 0
for i in count():
for _ in range(2*i+1):
yield (2**i, 10 * 2**i, randrange(6,n), counter)
counter += 1
for j in range(i+1):
yield (2**j, 10 * 2**j, randrange(6,n), counter)
counter += 1
def ecm(n, paramseq=ecmparams, nprocs=1, verbose=False):
"""
"Modern" integer factoring via elliptic curves. Uses Montgomery curves, the
two-phase algorithm, and (optionally) multiple processes. The hard work is
done by secm(); this function just does the managerial work of pulling a
sequence of parameters out of a generator and feeding them into secm().
Input:
n -- number to factor
paramseq -- sequence of parameters to feed into secm(). It must be an
infinite generator of 4-tuples (a,b,c,d), where a is the
number of iterations for the first phase, b is the number of
iterations for the second phase, c is a seed to select the
curve to work on, and d is an auxiliary used to count the
parameters generated so far. We need a < b and 6 <= c < n.
nprocs -- number of processes to use. Default == 1. Setting this > 1
is discouraged on "small" inputs because managing multiple
processes incurs significant overhead.
Output:
A factor of n.
Note that if the parameter sequence calls the randomizer (which is
currently the default behavior), then two successive calls may therefore
return two different results.
Examples:
>>> n = 625793187653 * 991459181683 # = 620448401733239439359999 = 24! - 1
>>> f = ecm(n)
>>> (n//f) * f
620448401733239439359999
"""
g = n % 6
if g % 2 == 0: return 2
if g % 3 == 0: return 3
if isprime(n): return n
m = ispower(n)
if m: return m[0]
if nprocs == 1:
for (B1,B2,seed,i) in paramseq(n):
f = secm(n, B1, B2, seed)
if 1 < f < n: return f
assert nprocs != 1
def factory(params, output): output.put(secm(*params))
ps, facs, procs = paramseq(n), mpQueue(), []
procs = [Process(target=factory, args=((n,)+next(ps)[:3], facs)) for _ in range(nprocs)]
for p in procs: p.start()
while True:
g = facs.get()
if 1 < g < n:
for p in procs: p.terminate()
return g
for p in range(nprocs): # TODO: Try doing this with a process pool.
if not procs[p].is_alive():
del procs[p]
break
procs.append(Process(target=factory, args=((n,)+next(ps)[:3], facs)))
procs[-1].start()
def sqrtmod_prime(a, p):
"""
Solves x**2 == a (mod p) for x. We assume that p is a prime and a is a
quadratic residue modulo p. If either of these assumptions is false, the
return value is meaningless.
The Cipolla-Lehmer section is my own. The rest appears to be derived from
https://codegolf.stackexchange.com/a/9088.
Input:
a -- natural number
p -- prime number
Output: whole number less than p
Examples:
>>> sqrtmod_prime(4, 5)
3
>>> sqrtmod_prime(13, 23)
6
>>> sqrtmod_prime(997, 7304723089)
761044645
"""
a %= p
if p%4 == 3: return pow(a, (p+1) >> 2, p)
elif p%8 == 5:
v = pow(a << 1, (p-5) >> 3, p)
return (a*v*(((a*v*v<<1)%p)-1))%p
elif p%8 == 1:
# CranPom ex 2.31, pg 112 / 126. Pretty sure this amounts to Cipolla-Lehmer.
if a == 0: return 0 # Necessary to avoid an infinite loop in the legendre section
h = 2
# while legendre(h*h - 4*a, p) != -1:
while pow(h*h - 4*a, (p-1) >> 1, p) != p - 1: h += 1 # TODO compare speed vs random selection
#return ( lucasmod((p+1)//2, h, a, p)[1] * modinv(2, p) ) % p
k, v, w, q, Q = (p+1)//2, 2, h % p, 1, 1
for kj in bin(k)[2:]:
q = (q*Q) % p
if kj == '1': Q, v, w = (q*a) % p, (w*v - h*q) % p, (w*w - 2*q*a) % p
else: Q, w, v = q , (w*v - h*q) % p, (v*v - 2*q ) % p
return (v*k) % p
else: return a # p == 2
def siqs(n, verbose=False):
"""
Uses the Self-Initializing Quadratic Sieve to extract a factor of n.
We make some calls to randrange, so the output may change from call to call.
This is derived from https://github.com/skollmann/PyFactorise.
Input:
n -- number to factor
verbose -- if True, print progress reports. Default == False.
Output:
If n is prime, we return n.
If n is composte, we return a nontrivial factor of n.
If n is too small, we raise a ValueError.
Examples:
>>> siqs(factorial(24) - 1) in (625793187653, 991459181683)
True
"""
if (not isinstance(n, int)) or n < 0: raise ValueError("Number must be a positive integer.")
if n < 2**64:
if verbose: print("Number is too small for SIQS. Using Pollard Rho instead.")
return pollardrho_brent(n)
if verbose: print("Factorizing %d (%d digits)..." % (n, len(str(n))))
if isprime(n): return n
if verbose: print("Number is composite.")
if verbose: print("Checking whether it is a perfect power...")
perfect_power = ispower(n)
if perfect_power:
if verbose: print(n, "=", "%d^%d." % (perfect_power[0], perfect_power[1]))
return perfect_power[0]
if verbose: print("Not a perfect power.")
if verbose: print("Using SIQS on %d (%d digits)..." % (n, len(str(n))))
if verbose: starttime, sievetime, latime = time(), 0, 0
# Choose parameters nf (sieve of factor base) and m (for sieving in [-m,m].
# Using similar parameters as msieve-1.52
dig = len(str(n))
if dig <= 34: nf, m = 200, 65536
elif dig <= 36: nf, m = 300, 65536
elif dig <= 38: nf, m = 400, 65536
elif dig <= 40: nf, m = 500, 65536
elif dig <= 42: nf, m = 600, 65536
elif dig <= 44: nf, m = 700, 65536
elif dig <= 48: nf, m = 1000, 65536
elif dig <= 52: nf, m = 1200, 65536
elif dig <= 56: nf, m = 2000, 65536 * 3
elif dig <= 60: nf, m = 4000, 65536 * 3
elif dig <= 66: nf, m = 6000, 65536 * 3
elif dig <= 74: nf, m = 10000, 65536 * 3
elif dig <= 80: nf, m = 30000, 65536 * 3
elif dig <= 88: nf, m = 50000, 65536 * 3
elif dig <= 94: nf, m = 60000, 65536 * 9
else: nf, m = 100000, 65536 * 9
class FactorBasePrime:
"""A factor base prime for the SIQS"""
def __init__(self, p, tmem, lp):
self.p, self.soln1, self.soln2, self.tmem, self.lp, self.ainv = p, None, None, tmem, lp, None
# Compute and return nf factor base primes suitable for a SIQS on n.
factor_base = []
for p in primegen():
if pow(n, (p-1) >> 1, p) == 1: # if n is a quadratic residue mod p
t = sqrtmod_prime(n, p)
lp = round(log2(p)) # This gets rounded for the sake of speed.
factor_base.append(FactorBasePrime(p, t, lp))
if len(factor_base) >= nf: break
if verbose: print("Factor base size: %d primes.\nLargest prime in base: %d." % (nf, factor_base[-1].p))
npolys, i_poly, prev_cnt, relcount, smooth_relations, required_relations_ratio = 0, 0, 0, 0, [], 1.05
p_min_i, p_max_i = None, None
for (i,fb) in enumerate(factor_base):
if p_min_i is None and fb.p >= 400: # 400 is a tunable parameter.
p_min_i = i
if p_max_i is None and fb.p > 4000: # 4000 is a tunable parameter.
p_max_i = i - 1
break
# The following may happen if the factor base is small, make sure that we have enough primes.
if p_max_i is None: p_max_i = len(factor_base) - 1
if p_min_i is None or p_max_i - p_min_i < 20: p_min_i = min(p_min_i, 5) # TODO This line is problematic for some small n.
target = sqrt(2 * float(n)) / m
target1 = target / sqrt((factor_base[p_min_i].p + factor_base[p_max_i].p) / 2)
while True:
if verbose: temptime = time()
if verbose: print("*** Phase 1: Finding smooth relations ***")
required_relations = round(nf * required_relations_ratio)
if verbose: print("Target: %d relations" % required_relations)
enough_relations = False
while not enough_relations:
if i_poly == 0: # Compute the first of a set of polynomials
# find q such that the product of factor_base[q_i] is about sqrt(2n)/m; try a few sets to find a good one
best_q, best_a, best_ratio = None, None, None
for _ in range(30):
a, q = 1, []
while a < target1:
p_i = 0
while p_i == 0 or p_i in q: p_i = randrange(p_min_i, p_max_i+1)
a *= factor_base[p_i].p
q.append(p_i)
ratio = a / target
# ratio too small seems to be not good
if (best_ratio is None or (ratio >= 0.9 and ratio < best_ratio) or best_ratio < 0.9 and ratio > best_ratio):
best_q, best_a, best_ratio = q, a, ratio
a, q = best_a, best_q
s, B = len(q), []
for l in range(s):
fb_l = factor_base[q[l]]
q_l = fb_l.p
assert a % q_l == 0
gamma = (fb_l.tmem * modinv(a // q_l, q_l)) % q_l
if gamma > q_l // 2: gamma = q_l - gamma
B.append(a // q_l * gamma)
b = sum(B) % a
b_orig = b
if (2 * b > a): b = a - b
assert 0 < b and 2 * b <= a and (b * b - n) % a == 0
g1, g2, g3, ga, gb = b * b - n, 2 * a * b, a * a, a, b_orig
ha, hb = a, b
for fb in factor_base:
if a % fb.p != 0:
fb.ainv = modinv(a, fb.p)
fb.soln1 = (fb.ainv * (fb.tmem - b)) % fb.p
fb.soln2 = (fb.ainv * (-fb.tmem - b)) % fb.p
else: # Compute the (i+1)-th polynomial, given that g is the i-th polynomial.
#v = lowest_set_bit(i) + 1
#z = -1 if ceil(i / (2 ** v)) % 2 == 1 else 1
z = -1 if ceil(i_poly / (1 + (i_poly ^ (i_poly-1)))) % 2 == 1 else 1
#b = (g.b + 2 * z * B[v - 1]) % g.a
b = (gb + 2 * z * B[(i_poly & (-i_poly)).bit_length() - 1]) % ga
a = ga
b_orig = b
if (2 * b > a): b = a - b
assert (b * b - n) % a == 0
g1, g2, g3, ga, gb = b * b - n, 2 * a * b, a * a, a, b_orig
ha, hb = a, b
for fb in factor_base:
if a % fb.p != 0:
fb.soln1 = (fb.ainv * ( fb.tmem - b)) % fb.p
fb.soln2 = (fb.ainv * (-fb.tmem - b)) % fb.p
i_poly += 1
npolys += 1
if i_poly >= 2 ** (len(B) - 1): i_poly = 0
# BEGIN SIEVING. Most of our time is spent between here and the "END SIEVING" comment.
sieve_array = [0] * (2 * m + 1)
for fb in factor_base:
if fb.soln1 is None: continue
p, lp = fb.p, fb.lp
a_start_1 = fb.soln1 - ((m + fb.soln1) // p) * p
if p > 20:
for a in range(a_start_1 + m, 2 * m + 1, p): sieve_array[a] += lp
a_start_2 = fb.soln2 - ((m + fb.soln2) // p) * p
for a in range(a_start_2 + m, 2 * m + 1, p): sieve_array[a] += lp
# Perform the trial division step of the SIQS.
limit = round(log2(m * sqrt(float(n)))) - 25 # 25 is a tunable parameter. The rounding is for speed.
for (i,sa) in enumerate(sieve_array):
if sa >= limit:
x = i - m
gx = (g3 * x + g2) * x + g1
# Determine whether gx can be fully factorized into primes from the factor base.
# If so, store the indices of the factors from the factor base as divisors_idx. If not, set that to None.
a, divisors_idx = gx, []
for (fbi,fb) in enumerate(factor_base):
if a % fb.p == 0:
exp = 0
while a % fb.p == 0:
a //= fb.p
exp += 1
divisors_idx.append((fbi, exp))
if a == 1:
u = ha * x + hb
v = gx
assert (u * u) % n == v % n
smooth_relations.append((u, v, divisors_idx))
break
relcount = len(smooth_relations)
if relcount >= required_relations:
enough_relations = True
break
# END SIEVING. Most of our time is spent between here and the "BEGIN SIEVING" comment.
if verbose and relcount > 0 and (relcount >= required_relations or i_poly % 8 == 0 or relcount > prev_cnt):
frac = relcount / required_relations
t = time() - starttime # Time spent so far
ett = t / frac # Estimated total time
ettc = ett - t # Estimated time to completion
eta = datetime.isoformat(datetime.now() + timedelta(seconds=ettc), sep=' ', timespec='seconds')
print('\b'*256 + "Found %d rels (%.1f%%) on %d polys. ETA %ds (%s). " % (relcount, 100*frac, npolys, ettc, eta), end='', flush=True)
prev_cnt = relcount
if verbose:
sievetime += time() - temptime
print("\nSieving time: %f seconds." % sievetime)
temptime = time()
print("*** Phase 2: Linear Algebra ***")
print("Building matrix for linear algebra step...")
M = [0] * nf
mask = 1
for sr in smooth_relations:
for (j,exp) in sr[2]:
if exp % 2: M[j] += mask
mask <<= 1
if verbose: print("Finding perfect squares using matrix and factors from perfect squares...")
# Perform the linear algebra step of the SIQS: do fast Gaussian elimination to determine pairs of perfect squares mod n.
# Use the optimisations described in [Çetin K. Koç and Sarath N. Arachchige. 'A Fast Algorithm for Gaussian Elimination
# over GF(2) and its Implementation on the GAPP.' Journal of Parallel and Distributed Computing 13.1 (1991): 118-122].
if verbose: gaussstart = time()
row_is_marked = bytearray([False]) * relcount
pivots = [-1] * nf
for j in range(nf):
M_j = M[j]
i = (M_j & (-M_j)).bit_length() - 1 #i = -1 if M[j] == 0 else lowest_set_bit(M[j])
# i is now the row of the first nonzero entry in column j, or -1 if no such row exists.
if i > -1:
pivots[j] = i
row_is_marked[i] = True
for k in range(nf):
if (M[k] >> i) & 1 and k != j: # test M[i][k] == 1
M[k] ^= M_j # add column j to column k mod 2
if verbose and ((nf - j) % 100 == 0 or nf == j + 1):
frac = (j+1) / nf
t = time() - gaussstart # Time spent so far
ett = t / frac # Estimated total time
ettc = ett - t # Estimated time to completion
eta = datetime.isoformat(datetime.now() + timedelta(seconds=ettc), sep=' ', timespec='seconds')
print('\b'*256 + "Gaussian elimination: %d/%d (%.1f%%). ETA %ds (%s). " % (j+1, nf, 100*frac, ettc, eta), end='', flush=True)
if verbose: print("\nGaussian elimination time: %f seconds" % (time() - gaussstart))
attempts = 0
for i in range(relcount):
if not row_is_marked[i]:
square_indices = [i]
for j in range(nf):
if (M[j] >> i) & 1: # test M[i][j] == 1
square_indices.append(pivots[j])
# Given the solution encoded by square_indices, try to find a factor of n, and return it.
attempts += 1
if verbose: print('\b'*42 + "Attempt #%d" % attempts, end='', flush=True)
# Given on of the solutions returned by siqs_solve_matrix and the corresponding smooth relations,
# calculate the pair (sqrt1, sqrt2) such that sqrt1^2 = sqrt2^2 (mod n).
sqrt1, sqrt2 = 1, 1
for idx in square_indices:
sqrt1 *= smooth_relations[idx][0]
sqrt2 *= smooth_relations[idx][1]
sqrt2 = isqrt(sqrt2)
assert (sqrt1 * sqrt1) % n == (sqrt2 * sqrt2) % n
factor = gcd(sqrt1 - sqrt2, n)
if 1 != factor != n:
if verbose:
print(" succeeded.")
latime += time() - temptime
print("Linear algebra time: %f seconds." % latime)
totaltime = time() - starttime
print("Total time: %f seconds." % (totaltime))
return factor
if verbose:
print('\b'*42 + "All %d attempts failed." % attempts)
latime += time() - temptime
print("Linear algebra time: %f seconds." % latime)
print("Failed to find a solution. Finding more relations...")
required_relations_ratio += 0.05
def multifactor(n, methods=(pollardrho_brent, pollard_pm1, williams_pp1, ecm, siqs), verbose=False):
"""
Integer factoring function. Uses several methods in parallel. Waits for a
function to return, kills the rest, and reports. Note that two successive
calls may return different results depending on which method finishes first
and whether any methods call the randomizer.
Input:
n -- number to factor
methods -- list of functions to run.
Output: A factor of n.
Examples:
>>> methods = [pollardrho_brent, pollard_pm1, williams_pp1, ecm, siqs]
>>> n = rpn("24 ! 1 -"); f = multifactor(n, methods); n%f
0
"""
# Note that the multiprocing incurs relatively significant overhead. Only call this if n is proving difficult to factor.
def factory(method, n, verbose, output): output.put((method(n, verbose=verbose), str(method).split()[1]))
factors = mpQueue()
procs = [Process(target=factory, args=(m, n, verbose, factors)) for m in methods]
for p in procs: p.start()
(f, g) = factors.get()
for p in procs: p.terminate()
names = {"pollardrho_brent":"prb", "pollard_pm1":"p-1", "williams_pp1":"p+1"}
return (f, names.get(g, g))
def primefac(n, trial=1000, rho=42000, verbose=False, methods=(pollardrho_brent,)):
"""
Generates the prime factors of the input. Factors that appear x times are
yielded x times.
Input:
n -- the number to be factored
trial -- Trial division is performed up to this limit and no further.
We trial divide by 2 and 3 whether the user wants to or not.
Default == 1000.
rho -- How long to have Pollard's rho chug before declaring a cofactor
difficult. Default == 42,000 iterations. Floating-point inf is
an acceptable value.
methods -- Use these methods on difficult cofactors. If the tuple has
more than 1 element, we have multifactor handle it. Calling
multifactor has a high overhead, so when the tuple has a
single element, we call that function directly. The default
is (pollardrho_brent,). Each function f in methods must
accept a single number n as its argument. If n is prime,
f(n) must return n. If n is composite, f(n) must return a
number strictly between 1 and n that evenly divides n.
Giving up is not allowed.
Output: Prime factors of n
Examples:
>>> list(primefac(1729))
[7, 13, 19]
>>> list(sorted(primefac(rpn("24 ! 1 -"))))
[625793187653, 991459181683]
"""
# Obtains a complete factorization of n, yielding the prime factors as they are obtained.
# If the user explicitly specifies a splitting method, use that method. Otherwise,
# 1. Pull out small factors with trial division.
# 2. Do a few rounds of Pollard's Rho algorithm.
# 3. Launch multifactor on the remainder. Note that multifactor's multiprocessing incurs relatively significant overhead.
if verbose: print("\nFactoring %d (%d digits):" % (n, len(str(n))))
if n < 0:
if verbose: print("Trial division: -1")
yield -1; n *= -1
if n < 2: return
if isprime(n):
if verbose: print("Number is prime.")
yield n
return
if verbose: print("Number is composite.")
# Trial division
factors, nroot = [], isqrt(n)
for p in primegen(max(4,trial)+1): # Note that we check for 2 and 3 whether the user wants to or not.
if verbose: print('\b'*80 + "Trial division:", p, end='', flush=False)
if n%p == 0:
while n%p == 0:
if verbose: print('\b'*80 + "Trial division:", p)
yield p
n //= p
nroot = isqrt(n)
if p > nroot:
if n != 1:
if verbose:
laststr = "Trial division: " + str(p)
print('\b'*80 + str(n) + " " * len(laststr))
yield n
return
if isprime(n):
if verbose:
laststr = "Trial division: " + str(p)
print('\b'*80 + str(n) + " " * len(laststr))
yield n
return
if verbose: print("\nTrial division finished.")
# TODO: Fermat's method?
# Pollard's rho
factors, difficult = [n], []
while len(factors) != 0:
rhocount = 0
n = factors.pop()
if verbose: print("Beginning Pollard Rho on %d." % n)
try:
g = n
while g == n:
x, c, g = randrange(1, n), randrange(1, n), 1
y = x
while g==1:
if rhocount >= rho: raise Exception
rhocount += 1
x = (x**2 + c) % n
y = (y**2 + c) % n
y = (y**2 + c) % n
g = gcd(x-y, n)
# We now have a nontrivial factor g of n. If we took too long to get here, we're actually at the except statement.
f = n // g
if verbose: print("Pollard Rho split %d into %d and %d." % (n, g, f))
if isprime(g):
if verbose: print(g, "is prime.")
yield g
else:
if verbose: print(g, "is composite.")
factors.append(g)
if isprime(f):
if verbose: print(f, "is prime.")
yield f
else:
if verbose: print(f, "is composite.")
factors.append(f)
except Exception:
if verbose: print("Declaring", n, "difficult.")
difficult.append(n) # Factoring n took too long. We'll have multifactor chug on it.
# TODO: P-1 by itself?
# TODO: ECM by itself?
names = {"pollardrho_brent":"prb", "pollard_pm1":"p-1", "williams_pp1":"p+1", "ecm":"ecm", "siqs":"siqs"}
factors = difficult
if len(methods) == 1:
method = methods[0]
name = names[str(method).split()[1]]
while len(factors) != 0:
n = factors.pop()
if verbose: print("Beginning %s on %d." % (name, n))
f = method(n, verbose=verbose)
g = n // f
if verbose: print("%s split %d into %d and %d." % (name, n, f, g))
if isprime(f):
if verbose: print(f, "is prime.")
yield f
else:
if verbose: print(f, "is composite.")
factors.append(f)
if isprime(g):
if verbose: print(g, "is prime.")
yield g
else:
if verbose: print(g, "is composite.")
factors.append(g)