Skip to content

Commit

Permalink
Refactors to improve performance (#257)
Browse files Browse the repository at this point in the history
* Simplifies abeles and resolution routines

* Removes if statements from abeles routines

* Reduces memory allocation

* Adds notImplemented exception to tests

* Reduces memory allocation for input compile arguments

* Restores original memory allocation

* Resolves memory allocation for packParams arrays

* Resolves memory allocation issues

* Addresses review comment
  • Loading branch information
DrPaulSharp authored Aug 22, 2024
1 parent 20c9130 commit 113a952
Show file tree
Hide file tree
Showing 38 changed files with 242 additions and 288 deletions.
64 changes: 30 additions & 34 deletions API/makeEmptyResultStruct.m
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
% fitNames: [nParamsx1 cell]

% -----------------------------------------------------------
maxArraySize = 10000;

% Make the individual structs....
% (1) result.calculationResults

Expand All @@ -35,19 +37,19 @@
% (2) result.contrastParams

backgroundParams = zeros(nContrasts,1);
coder.varsize('backgroundParams',[10000 1],[1 0]);
coder.varsize('backgroundParams',[maxArraySize 1],[1 0]);
scalefactors = zeros(nContrasts,1);
coder.varsize('scalefactors',[10000 1],[1 0]);
coder.varsize('scalefactors',[maxArraySize 1],[1 0]);
bulkIn = zeros(nContrasts,1);
coder.varsize('bulkIn',[10000 1],[1 0]);
coder.varsize('bulkIn',[maxArraySize 1],[1 0]);
bulkOut = zeros(nContrasts,1);
coder.varsize('bulkOut',[10000 1],[1 0]);
coder.varsize('bulkOut',[maxArraySize 1],[1 0]);
resolutionParams = zeros(nContrasts,1);
coder.varsize('resolutionParams',[10000 1],[1 0]);
coder.varsize('resolutionParams',[maxArraySize 1],[1 0]);
subRoughs = zeros(nContrasts,1);
coder.varsize('subRoughs',[10000 1],[1 0]);
coder.varsize('subRoughs',[maxArraySize 1],[1 0]);
resample = zeros(1, nContrasts);
coder.varsize('resample',[1 10000],[0 1]);
coder.varsize('resample',[1 maxArraySize],[0 1]);

contrastParams = struct('backgroundParams', backgroundParams, ...
'scalefactors', scalefactors, ...
Expand All @@ -61,84 +63,78 @@
% Make the final structure...

reflectivity = cell(nContrasts,1);
refCell = [1 1; 1 1];
refCell = ones(2,2);
coder.varsize('refCell',[10000 2],[1 0]);
for i = 1:nContrasts
reflectivity{i} = refCell;
end

simulation = cell(nContrasts,1);
simCell = [1 1; 1 1];
simCell = ones(2,2);
coder.varsize('simCell',[10000 2],[1 0]);
for i = 1:nContrasts
simulation{i} = simCell;
end

shiftedData = cell(nContrasts,1);
shiftCell = [1 1 1; 1 1 1];
shiftCell = ones(2,3);
coder.varsize('shiftCell',[10000 3],[1 0]);
for i = 1:nContrasts
shiftedData{i} = shiftCell;
end


layerSldCell = ones(2,3);
coder.varsize('layerSldCell',[10000 6],[1 1]);
if domains
layerSlds = cell(nContrasts,2);
domainLayerSldCell = [1 1 1; 1 1 1];
coder.varsize('domainLayerSldCell',[10000 6],[1 1]);
for i = 1:nContrasts
layerSlds{i,1} = domainLayerSldCell;
layerSlds{i,2} = domainLayerSldCell;
layerSlds{i,1} = layerSldCell;
layerSlds{i,2} = layerSldCell;
end
else
layerSlds = cell(nContrasts,1);
layerSldCell = [1 1 1; 1 1 1];
coder.varsize('layerSldCell',[10000 6],[1 1]);
for i = 1:nContrasts
layerSlds{i} = layerSldCell;
end
end


sldProfileCell = ones(2,2);
coder.varsize('sldProfileCell',[10000 2],[1 0]);
if domains
sldProfiles = cell(nContrasts,2);
domainSldProfileCell = [1 1; 1 1];
coder.varsize('domainSldProfileCell',[10000 inf],[1 1]);
for i = 1:nContrasts
sldProfiles{i,1} = domainSldProfileCell;
sldProfiles{i,2} = domainSldProfileCell;
sldProfiles{i,1} = sldProfileCell;
sldProfiles{i,2} = sldProfileCell;
end
else
sldProfiles = cell(nContrasts,1);
sldProfileCell = [1 1; 1 1];
coder.varsize('sldProfileCell',[10000 2],[1 0]);

for i = 1:nContrasts
sldProfiles{i,1} = sldProfileCell;
sldProfiles{i} = sldProfileCell;
end
end

resampledLayersCell = ones(2,3);
coder.varsize('resampledLayersCell',[10000 3],[1 0]);
if domains
resampledLayers = cell(nContrasts,2);
domainResampledLayersCell = [1 1 1; 1 1 1];
coder.varsize('domainResampledLayersCell',[10000 3],[1 0]);
for i = 1:nContrasts
resampledLayers{i,1} = domainResampledLayersCell;
resampledLayers{i,2} = domainResampledLayersCell;
resampledLayers{i,1} = resampledLayersCell;
resampledLayers{i,2} = resampledLayersCell;
end
else
resampledLayers = cell(nContrasts,1);
resampledLayersCell = [1 1 1; 1 1 1];
coder.varsize('resampledLayersCell',[10000 3],[1 0]);
for i = 1:nContrasts
resampledLayers{i} = resampledLayersCell;
end
end

fitParams = zeros(1,nParams);
coder.varsize('fitParams',[1 10000],[0 1]);
fitParams = zeros(nParams,1);
coder.varsize('fitParams',[maxArraySize 1],[1 0]);

fitNames = cell(nParams,1);
fitNamesChar = '';
coder.varsize('fitNamesChar',[1 1000],[0 1]);
coder.varsize('fitNamesChar',[1 maxArraySize],[0 1]);
for i = 1:nParams
fitNames{i} = fitNamesChar;
end
Expand Down
85 changes: 43 additions & 42 deletions compile/fullCompile/makeCompileArgsFull.m
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
% Define the arguments for compiling reflectivityCalculation
% using codegen.

%% Define argument types for entry-point 'reflectivityCalculation'.
%% Define argument types for entry-point 'RATMain'.
maxArraySize = 10000;
maxDataSize = 10000;

ARGS = cell(1,1);
ARGS{1} = cell(5,1);
Expand Down Expand Up @@ -36,15 +37,15 @@
ARGS_1_1.contrastDomainRatios = coder.typeof(0,[1 maxArraySize],[0 1]);
ARGS_1_1.domainRatio = coder.typeof(0,[1 maxArraySize],[0 1]);
ARGS_1_1.numberOfDomainContrasts = coder.typeof(0);
ARGS_1_1.fitParams = coder.typeof(0,[maxArraySize maxArraySize],[1 1]);
ARGS_1_1.otherParams = coder.typeof(0,[maxArraySize maxArraySize],[1 1]);
ARGS_1_1.fitLimits = coder.typeof(0,[maxArraySize maxArraySize],[1 1]);
ARGS_1_1.otherLimits = coder.typeof(0,[maxArraySize maxArraySize],[1 1]);
ARGS_1_1.fitParams = coder.typeof(0,[maxArraySize 1],[1 0]);
ARGS_1_1.otherParams = coder.typeof(0,[maxArraySize 1],[1 0]);
ARGS_1_1.fitLimits = coder.typeof(0,[maxArraySize 2],[1 0]);
ARGS_1_1.otherLimits = coder.typeof(0,[maxArraySize 2],[1 0]);
ARGS{1}{1} = coder.typeof(ARGS_1_1);
ARGS_1_2 = cell([1 21]);
ARG = coder.typeof(0,[1 2]);
ARGS_1_2{1} = coder.typeof({ARG}, [1 maxArraySize],[0 1]);
ARG = coder.typeof(0,[maxArraySize 5],[1 1]);
ARG = coder.typeof(0,[maxDataSize 5],[1 1]);
ARGS_1_2{2} = coder.typeof({ARG}, [1 maxArraySize],[0 1]);
ARG = coder.typeof(0,[1 2]);
ARGS_1_2{3} = coder.typeof({ARG}, [1 maxArraySize],[0 1]);
Expand All @@ -53,7 +54,7 @@
ARG = coder.typeof(0,[1 maxArraySize],[1 1]);
ARGS_1_2{5} = coder.typeof({ARG}, [1 maxArraySize],[0 1]);
ARG = coder.typeof(0,[1 10],[1 1]);
ARGS_1_2{6} = coder.typeof({ARG}, [maxArraySize 1],[1 0]);
ARGS_1_2{6} = coder.typeof({ARG}, [maxArraySize 1],[1 0]);
ARG = coder.typeof('X',[1 maxArraySize],[0 1]);
ARGS_1_2{7} = coder.typeof({ARG}, [1 maxArraySize],[0 1]);
ARG = coder.typeof('X',[1 maxArraySize],[0 1]);
Expand Down Expand Up @@ -87,14 +88,14 @@
ARGS{1}{2} = coder.typeof(ARGS_1_2,[1 21]);
ARGS{1}{2} = ARGS{1}{2}.makeHeterogeneous();
ARGS_1_3 = struct;
ARGS_1_3.param = coder.typeof(0,[maxArraySize 2],[1 0]);
ARGS_1_3.backgroundParam = coder.typeof(0,[maxArraySize 2],[1 0]);
ARGS_1_3.scalefactor = coder.typeof(0,[maxArraySize 2],[1 0]);
ARGS_1_3.qzshift = coder.typeof(0,[maxArraySize 2],[1 0]);
ARGS_1_3.bulkIn = coder.typeof(0,[maxArraySize 2],[1 0]);
ARGS_1_3.bulkOut = coder.typeof(0,[maxArraySize 2],[1 0]);
ARGS_1_3.resolutionParam = coder.typeof(0,[maxArraySize 2],[1 0]);
ARGS_1_3.domainRatio = coder.typeof(0,[maxArraySize 2],[1 0]);
ARGS_1_3.param = coder.typeof(0,[maxArraySize 2],[1 0]);
ARGS_1_3.backgroundParam = coder.typeof(0,[maxArraySize 2],[1 0]);
ARGS_1_3.scalefactor = coder.typeof(0,[maxArraySize 2],[1 0]);
ARGS_1_3.qzshift = coder.typeof(0,[maxArraySize 2],[1 0]);
ARGS_1_3.bulkIn = coder.typeof(0,[maxArraySize 2],[1 0]);
ARGS_1_3.bulkOut = coder.typeof(0,[maxArraySize 2],[1 0]);
ARGS_1_3.resolutionParam = coder.typeof(0,[maxArraySize 2],[1 0]);
ARGS_1_3.domainRatio = coder.typeof(0,[maxArraySize 2],[1 0]);
ARGS{1}{3} = coder.typeof(ARGS_1_3);
ARGS_1_4 = struct;
ARGS_1_4.procedure = coder.typeof('X',[1 maxArraySize],[0 1]);
Expand Down Expand Up @@ -138,72 +139,72 @@
ARGS{1}{4} = coder.typeof(ARGS_1_4);
ARGS_1_5 = struct;
ARG_20 = cell([1 4]);
ARG_20{1} = coder.typeof('X',[1 Inf],[0 1]);
ARG_20{2} = coder.typeof('X',[1 Inf],[0 1]);
ARG_20{1} = coder.typeof('X',[1 maxArraySize],[0 1]);
ARG_20{2} = coder.typeof('X',[1 maxArraySize],[0 1]);
ARG_20{3} = coder.typeof(0);
ARG_20{4} = coder.typeof(0);
ARG_20 = coder.typeof(ARG_20,[1 4]);
ARG_20 = ARG_20.makeHeterogeneous();
ARGS_1_5.param = coder.typeof({ARG_20}, [Inf 1],[1 0]);
ARGS_1_5.param = coder.typeof({ARG_20}, [maxArraySize 1],[1 0]);
ARG_21 = cell([1 4]);
ARG_21{1} = coder.typeof('X',[1 Inf],[0 1]);
ARG_21{2} = coder.typeof('X',[1 Inf],[0 1]);
ARG_21{1} = coder.typeof('X',[1 maxArraySize],[0 1]);
ARG_21{2} = coder.typeof('X',[1 maxArraySize],[0 1]);
ARG_21{3} = coder.typeof(0);
ARG_21{4} = coder.typeof(0);
ARG_21 = coder.typeof(ARG_21,[1 4]);
ARG_21 = ARG_21.makeHeterogeneous();
ARGS_1_5.backgroundParam = coder.typeof({ARG_21}, [Inf 1],[1 0]);
ARGS_1_5.backgroundParam = coder.typeof({ARG_21}, [maxArraySize 1],[1 0]);
ARG_22 = cell([1 4]);
ARG_22{1} = coder.typeof('X',[1 Inf],[0 1]);
ARG_22{2} = coder.typeof('X',[1 Inf],[0 1]);
ARG_22{1} = coder.typeof('X',[1 maxArraySize],[0 1]);
ARG_22{2} = coder.typeof('X',[1 maxArraySize],[0 1]);
ARG_22{3} = coder.typeof(0);
ARG_22{4} = coder.typeof(0);
ARG_22 = coder.typeof(ARG_22,[1 4]);
ARG_22 = ARG_22.makeHeterogeneous();
ARGS_1_5.resolutionParam = coder.typeof({ARG_22}, [Inf 1],[1 0]);
ARGS_1_5.resolutionParam = coder.typeof({ARG_22}, [maxArraySize 1],[1 0]);
ARG_23 = cell([1 4]);
ARG_23{1} = coder.typeof('X',[1 Inf],[0 1]);
ARG_23{2} = coder.typeof('X',[1 Inf],[0 1]);
ARG_23{1} = coder.typeof('X',[1 maxArraySize],[0 1]);
ARG_23{2} = coder.typeof('X',[1 maxArraySize],[0 1]);
ARG_23{3} = coder.typeof(0);
ARG_23{4} = coder.typeof(0);
ARG_23 = coder.typeof(ARG_23,[1 4]);
ARG_23 = ARG_23.makeHeterogeneous();
ARGS_1_5.bulkIn = coder.typeof({ARG_23}, [Inf 1],[1 0]);
ARGS_1_5.bulkIn = coder.typeof({ARG_23}, [maxArraySize 1],[1 0]);
ARG_24 = cell([1 4]);
ARG_24{1} = coder.typeof('X',[1 Inf],[0 1]);
ARG_24{2} = coder.typeof('X',[1 Inf],[0 1]);
ARG_24{1} = coder.typeof('X',[1 maxArraySize],[0 1]);
ARG_24{2} = coder.typeof('X',[1 maxArraySize],[0 1]);
ARG_24{3} = coder.typeof(0);
ARG_24{4} = coder.typeof(0);
ARG_24 = coder.typeof(ARG_24,[1 4]);
ARG_24 = ARG_24.makeHeterogeneous();
ARGS_1_5.bulkOut = coder.typeof({ARG_24}, [Inf 1],[1 0]);
ARGS_1_5.bulkOut = coder.typeof({ARG_24}, [maxArraySize 1],[1 0]);
ARG_25 = cell([1 4]);
ARG_25{1} = coder.typeof('X',[1 Inf],[0 1]);
ARG_25{2} = coder.typeof('X',[1 Inf],[0 1]);
ARG_25{1} = coder.typeof('X',[1 maxArraySize],[0 1]);
ARG_25{2} = coder.typeof('X',[1 maxArraySize],[0 1]);
ARG_25{3} = coder.typeof(0);
ARG_25{4} = coder.typeof(0);
ARG_25 = coder.typeof(ARG_25,[1 4]);
ARG_25 = ARG_25.makeHeterogeneous();
ARGS_1_5.qzshift = coder.typeof({ARG_25}, [Inf 1],[1 0]);
ARGS_1_5.qzshift = coder.typeof({ARG_25}, [maxArraySize 1],[1 0]);
ARG_26 = cell([1 4]);
ARG_26{1} = coder.typeof('X',[1 Inf],[0 1]);
ARG_26{2} = coder.typeof('X',[1 Inf],[0 1]);
ARG_26{1} = coder.typeof('X',[1 maxArraySize],[0 1]);
ARG_26{2} = coder.typeof('X',[1 maxArraySize],[0 1]);
ARG_26{3} = coder.typeof(0);
ARG_26{4} = coder.typeof(0);
ARG_26 = coder.typeof(ARG_26,[1 4]);
ARG_26 = ARG_26.makeHeterogeneous();
ARGS_1_5.scalefactor = coder.typeof({ARG_26}, [Inf 1],[1 0]);
ARGS_1_5.scalefactor = coder.typeof({ARG_26}, [maxArraySize 1],[1 0]);
% ARG_27 = coder.typeof(0);
ARG_27{1} = coder.typeof('X',[1 Inf],[0 1]);
ARG_27{2} = coder.typeof('X',[1 Inf],[0 1]);
ARG_27{1} = coder.typeof('X',[1 maxArraySize],[0 1]);
ARG_27{2} = coder.typeof('X',[1 maxArraySize],[0 1]);
ARG_27{3} = coder.typeof(0);
ARG_27{4} = coder.typeof(0);
ARG_27 = coder.typeof(ARG_27,[1 4]);
ARG_27 = ARG_27.makeHeterogeneous();
ARGS_1_5.domainRatio = coder.typeof({ARG_27}, [Inf 1],[1 0]);
ARGS_1_5.domainRatio = coder.typeof({ARG_27}, [maxArraySize 1],[1 0]);
ARG_28 = coder.typeof('X',[1 Inf],[0 1]);
ARGS_1_5.priorNames = coder.typeof({ARG_28}, [Inf 1],[1 0]);
ARGS_1_5.priorValues = coder.typeof(0, [Inf 3], [1 0]);
ARGS_1_5.priorNames = coder.typeof({ARG_28}, [maxArraySize 1],[1 0]);
ARGS_1_5.priorValues = coder.typeof(0, [maxArraySize 3], [1 0]);
ARGS{1}{5} = coder.typeof(ARGS_1_5);

end
end
31 changes: 16 additions & 15 deletions compile/reflectivityCalculation/makeCompileArgs.m
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

%% Define argument types for entry-point 'reflectivityCalculation'.
maxArraySize = 10000;
maxDataSize = 10000;

ARGS = cell(1,1);
ARGS{1} = cell(3,1);
Expand Down Expand Up @@ -36,15 +37,15 @@
ARGS_1_1.contrastDomainRatios = coder.typeof(0,[1 maxArraySize],[0 1]);
ARGS_1_1.domainRatio = coder.typeof(0,[1 maxArraySize],[0 1]);
ARGS_1_1.numberOfDomainContrasts = coder.typeof(0);
ARGS_1_1.fitParams = coder.typeof(0,[maxArraySize maxArraySize],[1 1]);
ARGS_1_1.otherParams = coder.typeof(0,[maxArraySize maxArraySize],[1 1]);
ARGS_1_1.fitLimits = coder.typeof(0,[maxArraySize maxArraySize],[1 1]);
ARGS_1_1.otherLimits = coder.typeof(0,[maxArraySize maxArraySize],[1 1]);
ARGS_1_1.fitParams = coder.typeof(0,[maxArraySize 1],[1 0]);
ARGS_1_1.otherParams = coder.typeof(0,[maxArraySize 1],[1 0]);
ARGS_1_1.fitLimits = coder.typeof(0,[maxArraySize 2],[1 0]);
ARGS_1_1.otherLimits = coder.typeof(0,[maxArraySize 2],[1 0]);
ARGS{1}{1} = coder.typeof(ARGS_1_1);
ARGS_1_2 = cell([1 21]);
ARG = coder.typeof(0,[1 2]);
ARGS_1_2{1} = coder.typeof({ARG}, [1 maxArraySize],[0 1]);
ARG = coder.typeof(0,[maxArraySize 5],[1 1]);
ARG = coder.typeof(0,[maxDataSize 5],[1 1]);
ARGS_1_2{2} = coder.typeof({ARG}, [1 maxArraySize],[0 1]);
ARG = coder.typeof(0,[1 2]);
ARGS_1_2{3} = coder.typeof({ARG}, [1 maxArraySize],[0 1]);
Expand All @@ -53,7 +54,7 @@
ARG = coder.typeof(0,[1 maxArraySize],[1 1]);
ARGS_1_2{5} = coder.typeof({ARG}, [1 maxArraySize],[0 1]);
ARG = coder.typeof(0,[1 10],[1 1]);
ARGS_1_2{6} = coder.typeof({ARG}, [maxArraySize 1],[1 0]);
ARGS_1_2{6} = coder.typeof({ARG}, [maxArraySize 1],[1 0]);
ARG = coder.typeof('X',[1 maxArraySize],[0 1]);
ARGS_1_2{7} = coder.typeof({ARG}, [1 maxArraySize],[0 1]);
ARG = coder.typeof('X',[1 maxArraySize],[0 1]);
Expand Down Expand Up @@ -87,14 +88,14 @@
ARGS{1}{2} = coder.typeof(ARGS_1_2,[1 21]);
ARGS{1}{2} = ARGS{1}{2}.makeHeterogeneous();
ARGS_1_3 = struct;
ARGS_1_3.param = coder.typeof(0,[maxArraySize 2],[1 0]);
ARGS_1_3.backgroundParam = coder.typeof(0,[maxArraySize 2],[1 0]);
ARGS_1_3.scalefactor = coder.typeof(0,[maxArraySize 2],[1 0]);
ARGS_1_3.qzshift = coder.typeof(0,[maxArraySize 2],[1 0]);
ARGS_1_3.bulkIn = coder.typeof(0,[maxArraySize 2],[1 0]);
ARGS_1_3.bulkOut = coder.typeof(0,[maxArraySize 2],[1 0]);
ARGS_1_3.resolutionParam = coder.typeof(0,[maxArraySize 2],[1 0]);
ARGS_1_3.domainRatio = coder.typeof(0,[maxArraySize 2],[1 0]);
ARGS_1_3.param = coder.typeof(0,[maxArraySize 2],[1 0]);
ARGS_1_3.backgroundParam = coder.typeof(0,[maxArraySize 2],[1 0]);
ARGS_1_3.scalefactor = coder.typeof(0,[maxArraySize 2],[1 0]);
ARGS_1_3.qzshift = coder.typeof(0,[maxArraySize 2],[1 0]);
ARGS_1_3.bulkIn = coder.typeof(0,[maxArraySize 2],[1 0]);
ARGS_1_3.bulkOut = coder.typeof(0,[maxArraySize 2],[1 0]);
ARGS_1_3.resolutionParam = coder.typeof(0,[maxArraySize 2],[1 0]);
ARGS_1_3.domainRatio = coder.typeof(0,[maxArraySize 2],[1 0]);
ARGS{1}{3} = coder.typeof(ARGS_1_3);
ARGS_1_4 = struct;
ARGS_1_4.procedure = coder.typeof('X',[1 maxArraySize],[0 1]);
Expand Down Expand Up @@ -137,4 +138,4 @@
ARGS_1_4.IPCFilePath = coder.typeof('X',[1 maxArraySize],[0 1]);
ARGS{1}{4} = coder.typeof(ARGS_1_4);

end
end
2 changes: 1 addition & 1 deletion minimisers/DE/runDE.m
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@
coder.varsize('S_MSE.I_no',[1 1],[0 0]);
coder.varsize('S_MSE.FVr_oa',[1 1],[0 0]);

problemStruct.fitParams = p;
problemStruct.fitParams = p';
problemStruct = unpackParams(problemStruct,controls);
result = reflectivityCalculation(problemStruct,problemCells,problemLimits,controls);
fval = result.calculationResults.sumChi;
Expand Down
Loading

0 comments on commit 113a952

Please sign in to comment.