Skip to content

Commit 82cddc4

Browse files
author
Sebastian Rinke
committed
Implemented preparing exiting particles to actually leave cell
1 parent 1755fc4 commit 82cddc4

File tree

1 file changed

+145
-103
lines changed

1 file changed

+145
-103
lines changed

Particles3D-vec/Particles3D.cpp.func_to_vectorize.c

+145-103
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,10 @@
44
#include <assert.h>
55

66
#define CACHE_SIZE (10*1024*1024) // To flush cache between benchmarks
7-
#define NUM_MESH_CELLS (30*30) //(30*30)
7+
#define NXC 20
8+
#define NYC 15
9+
#define NZC 3
10+
#define NUM_MESH_CELLS (NXC*NYZ*NZC) //(20*15*3)
811
#define NUM_PCLS_PER_MESH_CELL 1024
912
#define NUM_THREADS_PER_MESH_CELL 1
1013
#define ALIGNMENT 64
@@ -20,6 +23,7 @@
2023
typedef struct mesh_cell
2124
{
2225
int num_pcls;
26+
__attribute__ ((align(64))) int cell_id_xyz[3];
2327
/*
2428
* Lengths need to be multiple of BLOCKSIZE,
2529
* i.e., add padding if necessary
@@ -37,6 +41,8 @@ typedef struct mesh_cell
3741
__attribute__ ((align(64))) double field_component_5[6];
3842
__attribute__ ((align(64))) double field_component_6[6];
3943
__attribute__ ((align(64))) double field_component_7[6];
44+
int *exit_cells;
45+
double *exit_pcls;
4046
} Mesh_cell;
4147

4248
inline double time_sec();
@@ -45,7 +51,7 @@ void move_bucket_old();
4551
void move_bucket_new();
4652
void move_bucket_new_blocked();
4753
void move_bucket_new_blocked_sort();
48-
inline unsigned int calc_cell_idx(double x, double y, double z);
54+
inline int calc_cell_idx(double x, double y, double z);
4955

5056
/*
5157
* Global variables
@@ -57,7 +63,8 @@ Mesh_cell *mesh_cells;
5763
// Array to flush caches
5864
char volatile *cache;
5965

60-
// Member variables (random values)
66+
// Member variables (partly random values)
67+
int nxc = NXC, nyc = NYC, nzc = NZC;
6168
double xstart = 0, ystart = 0, zstart = 0;
6269
double inv_dx = .25, inv_dy = .25, inv_dz = .25;
6370
double dto2 = .4;
@@ -74,9 +81,10 @@ double *field_components[8];
7481

7582
int main(void)
7683
{
77-
double *q, *x, *y, *z, *u, *v, *w, *_xavg, *_yavg, *_zavg;
84+
double *q, *x, *y, *z, *u, *v, *w, *_xavg, *_yavg, *_zavg, *exit_pcls;
7885
long long *ParticleID;
79-
int num_blocks;
86+
int *exit_cells;
87+
int size, num_blocks;
8088
int pidx, i, j;
8189
void *ptr;
8290

@@ -96,89 +104,106 @@ int main(void)
96104
/*
97105
* Fill mesh cells
98106
*/
99-
for (i = 0; i < NUM_MESH_CELLS; i++)
100-
{
101-
int size;
107+
for (cid_x = 0; cid_x < nxc; cid_x++) {
108+
for (cid_y = 0; cid_y < nyc; cid_y++) {
109+
for (cid_z = 0; cid_z < nzc; cid_z++) {
110+
111+
i = cell_xyz_to_idx(cidx_x, cidx_y, cidx_z);
112+
113+
num_pcls = NUM_PCLS_PER_MESH_CELL;
114+
/*
115+
* Make sure that size of memory allocated
116+
* is multiple of BLOCKSIZE particles
117+
*/
118+
num_blocks = (num_pcls-1) / BLOCKSIZE + 1;
119+
size = num_blocks * BLOCKSIZE * sizeof(double);
120+
121+
/* Particles */
122+
ParticleID = ptr = _mm_malloc(num_blocks * BLOCKSIZE * sizeof(long long), ALIGNMENT);
123+
assert(ptr);
124+
q = ptr = _mm_malloc(size, ALIGNMENT);
125+
assert(ptr);
126+
x = ptr = _mm_malloc(size, ALIGNMENT);
127+
assert(ptr);
128+
y = ptr = _mm_malloc(size, ALIGNMENT);
129+
assert(ptr);
130+
z = ptr = _mm_malloc(size, ALIGNMENT);
131+
assert(ptr);
132+
u = ptr = _mm_malloc(size, ALIGNMENT);
133+
assert(ptr);
134+
v = ptr = _mm_malloc(size, ALIGNMENT);
135+
assert(ptr);
136+
w = ptr = _mm_malloc(size, ALIGNMENT);
137+
assert(ptr);
138+
_xavg = ptr = _mm_malloc(size, ALIGNMENT);
139+
assert(ptr);
140+
_yavg = ptr = _mm_malloc(size, ALIGNMENT);
141+
assert(ptr);
142+
_zavg = ptr = _mm_malloc(size, ALIGNMENT);
143+
assert(ptr);
144+
exit_cells = ptr = _mm_malloc(num_blocks * BLOCKSIZE * sizeof(int), ALIGNMENT);
145+
assert(ptr);
146+
exit_pcls = ptr = _mm_malloc(size, ALIGNMENT);
147+
assert(ptr);
148+
149+
/* Init cell's x, y, z coordinates (local to subdomain) */
150+
mesh_cells[i].cell_id_xyz[0] = cid_x;
151+
mesh_cells[i].cell_id_xyz[1] = cid_y;
152+
mesh_cells[i].cell_id_xyz[2] = cid_z;
153+
154+
/* Init particles */
155+
for (pidx = 0; pidx < num_pcls; pidx++)
156+
{
157+
ParticleID[pidx] = (long long) random();
158+
q[pidx] = (double) random();
159+
x[pidx] = (double) random();
160+
y[pidx] = (double) random();
161+
z[pidx] = (double) random();
162+
u[pidx] = (double) random();
163+
v[pidx] = (double) random();
164+
w[pidx] = (double) random();
165+
_xavg[pidx] = (double) random();
166+
_yavg[pidx] = (double) random();
167+
_zavg[pidx] = (double) random();
168+
}
169+
/* Init fields */
170+
for (j = 0; j < 6; j++) {
171+
mesh_cells[i].field_component_0[j] = (double) random();
172+
mesh_cells[i].field_component_1[j] = (double) random();
173+
mesh_cells[i].field_component_2[j] = (double) random();
174+
mesh_cells[i].field_component_3[j] = (double) random();
175+
mesh_cells[i].field_component_4[j] = (double) random();
176+
mesh_cells[i].field_component_5[j] = (double) random();
177+
mesh_cells[i].field_component_6[j] = (double) random();
178+
mesh_cells[i].field_component_7[j] = (double) random();
179+
}
102180

103-
num_pcls = NUM_PCLS_PER_MESH_CELL;
104-
/*
105-
* Make sure that size of memory allocated
106-
* is multiple of BLOCKSIZE particles
107-
*/
108-
num_blocks = (num_pcls-1) / BLOCKSIZE + 1;
109-
size = num_blocks * BLOCKSIZE * sizeof(double);
110-
111-
/* Particles */
112-
ParticleID = ptr = _mm_malloc(num_blocks * BLOCKSIZE * sizeof(long long), ALIGNMENT);
113-
assert(ptr);
114-
q = ptr = _mm_malloc(size, ALIGNMENT);
115-
assert(ptr);
116-
x = ptr = _mm_malloc(size, ALIGNMENT);
117-
assert(ptr);
118-
y = ptr = _mm_malloc(size, ALIGNMENT);
119-
assert(ptr);
120-
z = ptr = _mm_malloc(size, ALIGNMENT);
121-
assert(ptr);
122-
u = ptr = _mm_malloc(size, ALIGNMENT);
123-
assert(ptr);
124-
v = ptr = _mm_malloc(size, ALIGNMENT);
125-
assert(ptr);
126-
w = ptr = _mm_malloc(size, ALIGNMENT);
127-
assert(ptr);
128-
_xavg = ptr = _mm_malloc(size, ALIGNMENT);
129-
assert(ptr);
130-
_yavg = ptr = _mm_malloc(size, ALIGNMENT);
131-
assert(ptr);
132-
_zavg = ptr = _mm_malloc(size, ALIGNMENT);
133-
assert(ptr);
134-
135-
/* Init particles */
136-
for (pidx = 0; pidx < num_pcls; pidx++)
137-
{
138-
ParticleID[pidx] = (long long) random();
139-
q[pidx] = (double) random();
140-
x[pidx] = (double) random();
141-
y[pidx] = (double) random();
142-
z[pidx] = (double) random();
143-
u[pidx] = (double) random();
144-
v[pidx] = (double) random();
145-
w[pidx] = (double) random();
146-
_xavg[pidx] = (double) random();
147-
_yavg[pidx] = (double) random();
148-
_zavg[pidx] = (double) random();
149-
}
150-
/* Init fields */
151-
for (j = 0; j < 6; j++) {
152-
mesh_cells[i].field_component_0[j] = (double) random();
153-
mesh_cells[i].field_component_1[j] = (double) random();
154-
mesh_cells[i].field_component_2[j] = (double) random();
155-
mesh_cells[i].field_component_3[j] = (double) random();
156-
mesh_cells[i].field_component_4[j] = (double) random();
157-
mesh_cells[i].field_component_5[j] = (double) random();
158-
mesh_cells[i].field_component_6[j] = (double) random();
159-
mesh_cells[i].field_component_7[j] = (double) random();
181+
mesh_cells[i].num_pcls = num_pcls;
182+
mesh_cells[i].ParticleID = ParticleID;
183+
mesh_cells[i].q = q;
184+
mesh_cells[i].x = x;
185+
mesh_cells[i].y = y;
186+
mesh_cells[i].z = z;
187+
mesh_cells[i].u = u;
188+
mesh_cells[i].v = v;
189+
mesh_cells[i].w = w;
190+
mesh_cells[i]._xavg = _xavg;
191+
mesh_cells[i]._yavg = _yavg;
192+
mesh_cells[i]._zavg = _zavg;
193+
mesh_cells[i].exit_cells = exit_cells;
194+
mesh_cells[i].exit_pcls = exit_pcls;
195+
196+
ALIGNED(mesh_cells[i].ParticleID);
197+
ALIGNED(mesh_cells[i].q);
198+
ALIGNED(mesh_cells[i].x); ALIGNED(mesh_cells[i].y); ALIGNED(mesh_cells[i].z);
199+
ALIGNED(mesh_cells[i].u); ALIGNED(mesh_cells[i].v); ALIGNED(mesh_cells[i].w);
200+
ALIGNED(mesh_cells[i]._xavg);
201+
ALIGNED(mesh_cells[i]._yavg);
202+
ALIGNED(mesh_cells[i]._zavg);
203+
ALIGNED(mesh_cells[i].exit_cells);
204+
ALIGNED(mesh_cells[i].exit_pcls);
205+
}
160206
}
161-
162-
mesh_cells[i].num_pcls = num_pcls;
163-
mesh_cells[i].ParticleID = ParticleID;
164-
mesh_cells[i].q = q;
165-
mesh_cells[i].x = x;
166-
mesh_cells[i].y = y;
167-
mesh_cells[i].z = z;
168-
mesh_cells[i].u = u;
169-
mesh_cells[i].v = v;
170-
mesh_cells[i].w = w;
171-
mesh_cells[i]._xavg = _xavg;
172-
mesh_cells[i]._yavg = _yavg;
173-
mesh_cells[i]._zavg = _zavg;
174-
175-
ALIGNED(mesh_cells[i].ParticleID);
176-
ALIGNED(mesh_cells[i].q);
177-
ALIGNED(mesh_cells[i].x); ALIGNED(mesh_cells[i].y); ALIGNED(mesh_cells[i].z);
178-
ALIGNED(mesh_cells[i].u); ALIGNED(mesh_cells[i].v); ALIGNED(mesh_cells[i].w);
179-
ALIGNED(mesh_cells[i]._xavg);
180-
ALIGNED(mesh_cells[i]._yavg);
181-
ALIGNED(mesh_cells[i]._zavg);
182207
}
183208

184209

@@ -237,6 +262,9 @@ void move_bucket_new_blocked_sort()
237262
double *restrict x, *restrict y, *restrict z;
238263
double *restrict u, *restrict v, *restrict w;
239264
double *restrict _xavg, *restrict _yavg, *restrict _zavg;
265+
int *restrict exit_cells;
266+
double *restrict exit_pcls;
267+
int *restrict cell_id_xyz;
240268
// For weights calculation
241269
double abs_pos[3];
242270
double rel_pos[3];
@@ -263,11 +291,15 @@ void move_bucket_new_blocked_sort()
263291
__attribute__ ((align(64))) double Exl[BLOCKSIZE];
264292
__attribute__ ((align(64))) double Eyl[BLOCKSIZE];
265293
__attribute__ ((align(64))) double Ezl[BLOCKSIZE];
266-
// For what is left
294+
// For pushing
267295
double qdto2mc;
268296
double omsq, denom;
269297
double t[3], Om[3], avg[3];
270298
double udotOm;
299+
// For sorting
300+
int new_pcidx[BLOCKSIZE];
301+
int exit_cells_idx, exit_pcls_idx;
302+
int gap_idx;
271303
// Timing
272304
double time[6];
273305

@@ -286,11 +318,14 @@ void move_bucket_new_blocked_sort()
286318
BxyzExyz_l[5] = Ezl;
287319

288320
/* 1 thread works on one cell */
289-
#pragma omp for schedule(static,1) nowait
321+
#pragma omp for schedule(static,1)
290322
for (i = 0; i < NUM_MESH_CELLS; i++)
291323
{
292324
//printf("i: %d\n", i); fflush(stdout);
293-
cidx = i; // Cell index
325+
cidx = i; // Cell index
326+
exit_cells_idx = 1; // 1st idx where to start filling exit_cells[]
327+
exit_pcls_idx = 0; // 1st idx where to start filling exit_pcls[]
328+
gap_idx = 0; // 1st idx where gap in pcl arrays has to be filled
294329

295330
/* Set shortcuts to mesh cell's data */
296331
ParticleID = mesh_cells[cidx].ParticleID; ALIGNED(ParticleID);
@@ -304,6 +339,9 @@ void move_bucket_new_blocked_sort()
304339
_xavg = mesh_cells[cidx]._xavg; ALIGNED(_xavg);
305340
_yavg = mesh_cells[cidx]._yavg; ALIGNED(_yavg);
306341
_zavg = mesh_cells[cidx]._zavg; ALIGNED(_zavg);
342+
exit_cells = mesh_cells[cidx].exit_cells; ALIGNED(exit_cells);
343+
exit_pcls = mesh_cells[cidx].exit_pcls; ALIGNED(exit_pcls);
344+
cell_id_xyz = mesh_cells[cidx].cell_id_xyz; ALIGNED(cell_id_xyz);
307345

308346
/* Field components for this cell */
309347
field_component_0 = mesh_cells[cidx].field_component_0; ALIGNED(field_component_0);
@@ -359,9 +397,9 @@ void move_bucket_new_blocked_sort()
359397
//const int iy = cy + 1;
360398
//const int iz = cz + 1;
361399
// fraction of the distance from the right of the cell
362-
w1[0] = cx - cm1_pos[0];
363-
w1[1] = cy - cm1_pos[1];
364-
w1[2] = cz - cm1_pos[2];
400+
w1[0] = cell_id_xyz[0] - cm1_pos[0];
401+
w1[1] = cell_id_xyz[1] - cm1_pos[1];
402+
w1[2] = cell_id_xyz[2] - cm1_pos[2];
365403
// fraction of distance from the left
366404
w0[0] = 1-w1[0];
367405
w0[1] = 1-w1[1];
@@ -411,7 +449,7 @@ void move_bucket_new_blocked_sort()
411449
//time[3] = time_sec();
412450
#if 1
413451

414-
/* Do what is left */
452+
/* Push particles */
415453
//#pragma vector nontemporal (_xavg,_yavg,_zavg)
416454
for (j = 0, pidx = cpidx; j < BLOCKSIZE; j++, pidx++) {
417455
Om[0] = qdto2mc * Bxl[j];
@@ -515,13 +553,14 @@ void move_bucket_new_blocked_sort()
515553
exit_cells_idx++; // Point to next empty slot for cell idx
516554
}
517555

518-
519-
- Init gap idx to 0 (location of 1st pcl), increment with next pcl if required
520-
(- Pcls must be sorted before we enter pusher)
521-
- calc cell idx (integer value) XXX
522-
- gather pcl data into leave array XXX
523-
- close gaps in array of pcls that stay XXX
524-
- Store exit counter in exit_cells[] XXX
556+
/*
557+
- Init gap idx to 0 (location of 1st pcl), increment with next pcl if required XXX
558+
(- Pcls must be sorted before we enter pusher)
559+
- calc cell idx (integer value) XXX
560+
- gather pcl data into leave array XXX
561+
- close gaps in array of pcls that stay XXX
562+
- Store exit counter in exit_cells[] XXX
563+
*/
525564
}
526565

527566
#endif
@@ -538,7 +577,10 @@ void move_bucket_new_blocked_sort()
538577
#endif
539578
}
540579
/* Every cell collects its incoming particles */
541-
- Update num_pcls in mesh_cell
580+
/*
581+
* TODO
582+
- Update num_pcls in mesh_cell
583+
*/
542584
}
543585
time[1] = time_sec();
544586
printf("move_bucket_new_blocked_sort: total: %f\n", time[1] - time[0]);
@@ -1240,7 +1282,7 @@ void flush_cache()
12401282
cache[i] = (cache[i] + i) % 256;
12411283
}
12421284

1243-
inline unsigned int calc_cell_idx(double xpos, double ypos, double zpos)
1285+
inline int calc_cell_idx(double xpos, double ypos, double zpos)
12441286
{
12451287
int cx, cy, cz;
12461288

@@ -1263,7 +1305,7 @@ inline unsigned int calc_cell_idx(double xpos, double ypos, double zpos)
12631305
cz = (cz >= nzc) ? nzc-1 : cz;
12641306

12651307
// cx * nyc * nzc + cy * nzc + cz
1266-
return (unsigned int) ((cx * nyc + cy) * nzc + cz);
1308+
return (cx * nyc + cy) * nzc + cz;
12671309
}
12681310

12691311

0 commit comments

Comments
 (0)