Skip to content

Commit

Permalink
Code compiles (under nvidia)
Browse files Browse the repository at this point in the history
  • Loading branch information
kif committed Nov 26, 2024
1 parent 7c737c6 commit f66755a
Showing 1 changed file with 12 additions and 14 deletions.
26 changes: 12 additions & 14 deletions src/pyFAI/resources/openCL/medfilt.cl
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,7 @@ float2 inline sum_float2_reduction(local float* shared)
here.s1 = shared[pos_here+1];
there.s0 = shared[pos_there];
there.s1 = shared[pos_there+1];
shared[2*tid+1] = here.s1;
here =
+= shared[tid+stride];
here = dw_plus_dw(here, there);
shared[2*tid] = here.s0;
shared[2*tid+1] = here.s1;
}
Expand Down Expand Up @@ -113,7 +111,7 @@ csr_medfilt ( const global float4 *data4,
global float *averint,
global float *stdevpix,
global float *stderrmean,
local int* shared_int // size of the workgroup size
local int* shared_int, // size of the workgroup size
local float* shared_float // size of 2x the workgroup size
)
{
Expand All @@ -128,7 +126,7 @@ csr_medfilt ( const global float4 *data4,
char curr_error_model=error_model;
float8 result;
float sum=0.0f, ratio=1.3f;
float2 acc_sig, acc_nrm, acc_var;
float2 acc_sig, acc_nrm, acc_var, acc_nrm2;

// first populate the work4 array from data4
for (int i=start+tid; i<stop; i+=wg)
Expand All @@ -152,29 +150,29 @@ csr_medfilt ( const global float4 *data4,
step = first_step(step, size, ratio);

for (step=step; step>0; step=previous_step(step, ratio))
cnt = passe_float4(&work4[start], size, step, shared);
cnt = passe_float4(&work4[start], size, step, shared_int);

while (cnt)
cnt = passe_float4(&work4[start], size, 1, shared);
cnt = passe_float4(&work4[start], size, 1, shared_int);

// Then perform the cumsort of the weights to s0
// In blelloch scan, one workgroup can process 2wg in size.
niter = (size + 2*wg-1)/(2*wg);
sum = 0.0f;
for (int i=0; i<niter; i+=1)
for (int i=0; i<niter; i++)
{
idx += start + tid + 2*wg*i;
idx = start + tid + 2*wg*i;

shared_float[tid] = (idx<stop)?work4[idx].s3:0.0f;
shared_float[tid+wg] = ((idx+wg)<stop)?work4[idx+wg].s3:0.0f;

blelloch_scan_float(shared_float);

if (idx<size)
work4[idx].s0 = sum + shared[lid];
if (i+ws<size)
work4[idx+wg].s0 = sum + shared[lid+g];
sum += shared[2*ws-1];
if (idx<stop)
work4[idx].s0 = sum + shared_float[tid];
if ((i+wg)<stop)
work4[idx+wg].s0 = sum + shared_float[tid+wg];
sum += shared_float[2*wg-1];

}
// Perform the sum for accumulator of signal, variance, normalization and count
Expand Down

0 comments on commit f66755a

Please sign in to comment.