Skip to content

Commit 86cc0e5

Browse files
authored
Merge pull request #72 from Treecodes/develop
Develop
2 parents 83bba1b + 6b8e963 commit 86cc0e5

File tree

112 files changed

+5905
-1794
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

112 files changed

+5905
-1794
lines changed

README.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,9 @@ BaryTree
1010
========
1111

1212
A work-in-progress library for fast computation of N-body interactions on multiple GPUs,
13-
BaryTree implements barycentric Lagrange and Hermite polynomial interpolation treecodes.
14-
The current code employs an OpenACC GPU implementation.
13+
BaryTree implements barycentric Lagrange and Hermite polynomial interpolation fast
14+
summation methods. The current code employs an OpenACC GPU implementation with MPI
15+
for distributed memory parallelization.
1516

1617

1718
Authors:

examples/CMakeLists.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@ if(BUILD_EXAMPLES)
1414
target_link_libraries(random_cube_reproducible_cpu PRIVATE BaryTree_cpu Zoltan_Interface)
1515
install(TARGETS random_cube_reproducible_cpu DESTINATION bin)
1616

17+
add_executable(run_readin_cpu run_readin.c ${AUX_SRCS})
18+
target_link_libraries(run_readin_cpu PRIVATE BaryTree_cpu Zoltan_Interface)
19+
install(TARGETS run_readin_cpu DESTINATION bin)
20+
1721
add_executable(test_BaryTreeInterface_cpu test_BaryTreeInterface.c)
1822
target_link_libraries(test_BaryTreeInterface_cpu PRIVATE BaryTree_cpu)
1923
install(TARGETS test_BaryTreeInterface_cpu DESTINATION bin)
@@ -27,6 +31,10 @@ if(BUILD_EXAMPLES)
2731
target_link_libraries(random_cube_reproducible_gpu PRIVATE BaryTree_gpu Zoltan_Interface)
2832
install(TARGETS random_cube_reproducible_gpu DESTINATION bin)
2933

34+
add_executable(run_readin_gpu run_readin.c ${AUX_SRCS})
35+
target_link_libraries(run_readin_gpu PRIVATE BaryTree_gpu Zoltan_Interface)
36+
install(TARGETS run_readin_gpu DESTINATION bin)
37+
3038
add_executable(test_BaryTreeInterface_gpu test_BaryTreeInterface.c)
3139
target_link_libraries(test_BaryTreeInterface_gpu PRIVATE BaryTree_gpu)
3240
install(TARGETS test_BaryTreeInterface_gpu DESTINATION bin)

examples/README.md

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -30,17 +30,20 @@ The parameters that can be specified in the infile are as follows:
3030
| `num_particles` | Number of sources and targets. Its use is exclusive with the `num_sources` and `num_targets` parameters.
3131
| `num_sources` | Number of sources.
3232
| `num_targets` | Number of targets.
33-
| `order` | Order of polynomial interpolation.
33+
| `distribution` | Underlying particle distribution: `UNIFORM`, `GAUSSIAN`, `EXPONENTIAL`, `PLUMMER`, or `PLUMMER_SYMMETRIC`.
34+
| `degree` | Degree of polynomial interpolation.
3435
| `theta` | Multipole acceptance criterion (MAC).
35-
| `max_per_leaf` | Maximum number of particles per tree leaf.
36-
| `max_per_batch` | Maximum number of particles per batch.
37-
| `kernel_name` | Name of interaction kernel: `yukawa` or `coulomb`.
38-
| `approximation` | Type of polynomial: `lagrange` and `hermite`.
39-
| `size_check` | If the product of this parameter and the number of interpolation points in a cluster is greater than the number of particles in the cluster, then the interaction will be performed directly even if the MAC is accepted.
40-
| `run_direct` | Run direct calculation for error comparison: `on` or `off`.
41-
| `verbosity` | Determines verbosity level of output. `0` is quiet, `1` is verbose.
42-
| `slice` | Determines the proportion of target sites at which the direct calculation is performed for error comparison.
36+
| `max_per_source_leaf` | Maximum number of particles per source tree leaf (or source batch, for `CLUSTER_PARTICLE`).
37+
| `max_per_target_leaf` | Maximum number of particles per target tree leaf (or target batch, for `PARTICLE_CLUSTER`).
38+
| `beta` | Automatic tuning accuracy parameter. Number in [0,1], higher is more accurate.
39+
| `compute_type` | Type of treecode method. `CLUSTER_PARTICLE`, `PARTICLE_CLUSTER` (i.e. BLTC), `CLUSTER_CLUSTER` (i.e. BLDTT).
40+
| `approximation` | Type of polynomial: `LAGRANGE` and `HERMITE`. `HERMITE` is incompatible with cluster-cluster.
41+
| `kernel_name` | Name of interaction kernel: `COULOMB`, `YUKAWA`, `REGULARIZED_COULOMB`, `REGULARIZED_YUKAWA`, `SIN_OVER_R`, `USER`.
4342
| `kernel_params` | Comma separated list of parameters for given kernel.
43+
| `run_direct` | Run direct calculation for error comparison: `ON` or `OFF`.
44+
| `verbosity` | Determines verbosity level of output. Integer `0`, `1`, `2`, `3`. Higher means more output.
45+
| `slice` | Determines the proportion of target sites at which the direct calculation is performed for error comparison. 10 would mean every 10th target is sampled.
46+
4447

4548
Note the difference between these executables:
4649

examples/example.in

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,16 @@
11
num_sources 20000
22
num_targets 20000
3-
order 2
3+
degree 2
44
theta 0.9
5+
beta -1.0
56
size_check 0.0
67
max_per_source_leaf 100
78
max_per_target_leaf 100
89
kernel_name coulomb
910
kernel_params 1.0
1011
approximation lagrange
1112
compute_type particle-cluster
13+
distribution uniform
1214
run_direct 1
1315
slice 10
1416
verbosity 1

examples/random_cube.c

Lines changed: 42 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,19 @@ int main(int argc, char **argv)
3333

3434
/* run parameters */
3535
int N, M, run_direct, slice;
36+
double xyz_limits[6];
37+
DISTRIBUTION distribution;
38+
PARTITION partition;
39+
int sample_size = 1000000;
40+
3641
struct RunParams *run_params = NULL;
37-
int sample_size = 10000;
42+
3843
FILE *fp = fopen(argv[1], "r");
39-
Params_Parse(fp, &run_params, &N, &M, &run_direct, &slice);
44+
Params_Parse(fp, &run_params, &N, &M, &run_direct, &slice, xyz_limits, &distribution, &partition);
45+
46+
double xmin = xyz_limits[0], xmax = xyz_limits[1];
47+
double ymin = xyz_limits[2], ymax = xyz_limits[3];
48+
double zmin = xyz_limits[4], zmax = xyz_limits[5];
4049

4150
/* Zoltan variables */
4251
int rc;
@@ -94,15 +103,13 @@ int main(int argc, char **argv)
94103
time_t t = time(NULL);
95104
unsigned t_hashed = (unsigned) t;
96105
t_hashed = mrand * t_hashed + crand;
97-
srand(t_hashed ^ rank);
98-
srand(1);
106+
srandom(t_hashed ^ rank);
107+
//srandom(1);
99108

100109
for (int i = 0; i < sample_size; ++i) {
101-
mySources.x[i] = ((double)rand()/(double)(RAND_MAX)) * 2. - 1.;
102-
mySources.y[i] = ((double)rand()/(double)(RAND_MAX)) * 2. - 1.;
103-
mySources.z[i] = ((double)rand()/(double)(RAND_MAX)) * 2. - 1.;
104-
mySources.q[i] = ((double)rand()/(double)(RAND_MAX)) * 2. - 1.;
105-
mySources.w[i] = ((double)rand()/(double)(RAND_MAX)) * 2. - 1.;
110+
mySources.x[i] = Point_Set_Init(distribution);
111+
mySources.y[i] = Point_Set_Init(distribution);
112+
mySources.z[i] = Point_Set_Init(distribution);
106113
mySources.myGlobalIDs[i] = (ZOLTAN_ID_TYPE)(rank*N + i);
107114

108115
mySources.b[i] = 1.0; // dummy weighting scheme
@@ -158,8 +165,6 @@ int main(int argc, char **argv)
158165
mySources.x[i] = mySources.x[mySources.numMyPoints-1];
159166
mySources.y[i] = mySources.y[mySources.numMyPoints-1];
160167
mySources.z[i] = mySources.z[mySources.numMyPoints-1];
161-
mySources.q[i] = mySources.q[mySources.numMyPoints-1];
162-
mySources.w[i] = mySources.w[mySources.numMyPoints-1];
163168
mySources.myGlobalIDs[i] = mySources.myGlobalIDs[mySources.numMyPoints-1];
164169
mySources.numMyPoints--;
165170
} else {
@@ -174,12 +179,12 @@ int main(int argc, char **argv)
174179
exit(0);
175180
}
176181

177-
double xmin = minval(mySources.x, mySources.numMyPoints);
178-
double ymin = minval(mySources.y, mySources.numMyPoints);
179-
double zmin = minval(mySources.z, mySources.numMyPoints);
180-
double xmax = maxval(mySources.x, mySources.numMyPoints);
181-
double ymax = maxval(mySources.y, mySources.numMyPoints);
182-
double zmax = maxval(mySources.z, mySources.numMyPoints);
182+
double zz_bound_x_min = minval(mySources.x, mySources.numMyPoints);
183+
double zz_bound_y_min = minval(mySources.y, mySources.numMyPoints);
184+
double zz_bound_z_min = minval(mySources.z, mySources.numMyPoints);
185+
double zz_bound_x_max = maxval(mySources.x, mySources.numMyPoints);
186+
double zz_bound_y_max = maxval(mySources.y, mySources.numMyPoints);
187+
double zz_bound_z_max = maxval(mySources.z, mySources.numMyPoints);
183188

184189

185190
Zoltan_LB_Free_Part(&importGlobalGids, &importLocalGids,
@@ -224,12 +229,23 @@ int main(int argc, char **argv)
224229
/* Generating sources and targets based on Zoltan bounding box */
225230

226231
for (int i = 0; i < sources->num; ++i) {
227-
sources->x[i] = ((double)rand()/(double)(RAND_MAX)) * (xmax-xmin) + xmin;
228-
sources->y[i] = ((double)rand()/(double)(RAND_MAX)) * (ymax-ymin) + ymin;
229-
sources->z[i] = ((double)rand()/(double)(RAND_MAX)) * (zmax-zmin) + zmin;
230-
sources->q[i] = ((double)rand()/(double)(RAND_MAX)) * 2. - 1.;
231-
sources->w[i] = ((double)rand()/(double)(RAND_MAX)) * 2. - 1.;
232+
sources->x[i] = Point_Set(distribution, zz_bound_x_min, zz_bound_x_max) * (xmax-xmin) + xmin;
233+
sources->y[i] = Point_Set(distribution, zz_bound_y_min, zz_bound_y_max) * (ymax-ymin) + ymin;
234+
sources->z[i] = Point_Set(distribution, zz_bound_z_min, zz_bound_z_max) * (zmax-zmin) + zmin;
235+
236+
sources->q[i] = Point_Set(UNIFORM, -1., 1.);
237+
sources->w[i] = Point_Set(UNIFORM, -1., 1.);
238+
}
239+
240+
/*
241+
char points_file[256];
242+
sprintf(points_file, "points_rank_%d.csv", rank);
243+
FILE *points_fp = fopen(points_file, "w");
244+
for (int i = 0; i < sources->num; ++i) {
245+
fprintf(points_fp, "%e, %e, %e\n", sources->x[i], sources->y[i], sources->z[i]);
232246
}
247+
fclose(points_fp);
248+
*/
233249

234250
/* MPI-allocated target arrays for RMA use */
235251

@@ -241,10 +257,10 @@ int main(int argc, char **argv)
241257
/* Generating targets based on Zoltan bounding box */
242258

243259
for (int i = 0; i < targets->num; ++i) {
244-
targets->x[i] = ((double)rand()/(double)(RAND_MAX)) * (xmax-xmin) + xmin;
245-
targets->y[i] = ((double)rand()/(double)(RAND_MAX)) * (ymax-ymin) + ymin;
246-
targets->z[i] = ((double)rand()/(double)(RAND_MAX)) * (zmax-zmin) + zmin;
247-
targets->q[i] = ((double)rand()/(double)(RAND_MAX)) * 2. - 1.;
260+
targets->x[i] = Point_Set(distribution, zz_bound_x_min, zz_bound_x_max) * (xmax-xmin) + xmin;
261+
targets->y[i] = Point_Set(distribution, zz_bound_y_min, zz_bound_y_max) * (ymax-ymin) + ymin;
262+
targets->z[i] = Point_Set(distribution, zz_bound_z_min, zz_bound_z_max) * (zmax-zmin) + zmin;
263+
targets->q[i] = Point_Set(UNIFORM, -1., 1.);
248264
}
249265

250266
#ifdef OPENACC_ENABLED

0 commit comments

Comments
 (0)