-
Notifications
You must be signed in to change notification settings - Fork 0
/
hail64p.c
173 lines (173 loc) · 3.89 KB
/
hail64p.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
#include <stdio.h>
#include <stdint.h>
#include <x86intrin.h>
#include "hailstone.h"
/*
* 64-bit wide integer search using polynomials
*/
#if CHECK_MAXVALUE
#if NO_UPDATE
void hail64pmf
#else
void hail64pm
#endif
#elif CHECK_MAXVALUE_PARENTS
#if NO_UPDATE
void hail64plf
#else
void hail64pl
#endif
#else
#if NO_UPDATE
void hail64pnf
#else
void hail64pn
#endif
#endif
(uint32_t *n, // input: number to search
int32_t *steps, // input/output: # steps
int32_t *maxsteps, // input/output: maximum
uint32_t *maxvalue,int32_t maxvalue_size // input/output: maximum
#if NO_UPDATE
,int32_t *peak_found
#endif
)
{
int32_t lsteps = *steps;
int32_t lmaxsteps = *maxsteps;
uint64_t num = n[0];
uint64_t num2 = n[1];
uint64_t num3;
uint64_t max = maxvalue[0];
uint64_t max2 = (maxvalue_size > 2)?0xfffffffffffffffflu : maxvalue[1];
int clz;
int nsize;
struct poly *ptab;
#if DEBUG
if (debug){
printf("%s(%lu %lu)\n",__FUNCTION__,num2,num);
}
#endif
while (num2 > 0){
#if DEBUG
if (debug){ printf("\t%d: %lu %lu\n",lsteps,num2,num); }
#endif
#if CHECK_MAXVALUE
ptab = &mpoly10[num & ((1<<POLY_WIDTH)-1)];
#else
ptab = &fpoly10[num & ((1<<POLY_WIDTH)-1)];
#endif
if (num2 < 65536){
num = num & ~((1<<POLY_WIDTH)-1);
num = (num2 << 32)|num;
num = num >> ptab->div2;
num = num * ptab->mul3 + ptab->add;
num2 = num >> 32;
num = num & 0xffffffff;
num3 = 0;
lsteps += ptab->steps;
} else {
num = num & ~((1<<POLY_WIDTH)-1); // get rid of bottom bits
// divide by power of 2
num = (num >> ptab->div2)|((num2 & ((1<<ptab->div2)-1))<<(32 - ptab->div2));
num2 = num2 >> ptab->div2;
// multiply by power of 3
num = num * ptab->mul3 + ptab->add;
num2 = num2 * ptab->mul3 + (num >> 32);
num = num & 0xffffffff;
num3 = num2 >> 32;
lsteps += ptab->steps;
if (num3){
num2 = num2 & 0xffffffff;
n[0] = num;
n[1] = num2;
n[2] = num3;
*steps = lsteps;
nsize = 3;
#if CHECK_MAXVALUE
if ((maxvalue_size == 2)||
((maxvalue_size == 3) &&
((num3 > maxvalue[2])||
((num3 == maxvalue[2])&&
((num2 > maxvalue[1])||
((num2 == maxvalue[1])&&
(num > maxvalue[0]))))))){
#if NO_UPDATE
*peak_found = 1;
n[0] = 1;
n[1] = 0;
n[2] = 0;
return;
#else
global_maxvalue_found = 1;
global_maxvalue[0] = maxvalue[0] = num;
global_maxvalue[1] = maxvalue[1] = num2;
global_maxvalue[2] = maxvalue[2] = num3;
global_maxvalue_size = maxvalue_size = 3;
#endif
}
#if NO_UPDATE
hailxmf(n,nsize,steps,maxsteps,maxvalue,maxvalue_size,peak_found);
#else
hailxm(n,nsize,steps,maxsteps,maxvalue,maxvalue_size);
#endif
maxvalue[0] = global_maxvalue[0];
maxvalue[1] = global_maxvalue[1];
maxvalue[2] = global_maxvalue[2];
maxvalue_size = global_maxvalue_size;
#else
#if NO_UPDATE
hailxnf(n,nsize,steps,maxsteps,maxvalue,maxvalue_size,peak_found);
#else
hailxn(n,nsize,steps,maxsteps,maxvalue,maxvalue_size);
#endif
#endif
lsteps = *steps;
lmaxsteps = *maxsteps;
num = n[0];
num2 = n[1];
max2 = 0xfffffffffffffffflu;
nsize = 2;
continue;
}
}
#if CHECK_MAXVALUE
if ((num2 > max2)||
((num2 == max2) && (num > max))){
#if NO_UPDATE
*peak_found = 1;
// do nothing
#else
global_maxvalue_found = 1;
global_maxvalue[0] = max = num;
global_maxvalue[1] = max2 = num2;
global_maxvalue_size = 2;
#endif
}
#endif
#if !CHECK_MAXVALUE
// clz check
clz = __lzcnt64((num2<<32)|num);
if (clz64[clz] + lsteps < lmaxsteps){
n[0] = 1;
n[1] = 0;
*steps = lsteps;
return;
}
#endif
}
n[0] = num;
n[1] = num2;
*steps = lsteps;
#if CHECK_MAXSTEPS
if (lsteps > *maxsteps){
#if NO_UPDATE
*peak_found = 1;
#else
global_maxsteps_found = 1;
global_maxsteps = *maxsteps = lsteps;
#endif
}
#endif
return;
}