macsbio · chaitrasarathy · Jan 20, 2021 · Jan 19, 2021 · Jan 19, 2021 · Jan 19, 2021
diff --git a/ComMet.mlx b/ComMet.mlx
diff --git a/README.md b/README.md
@@ -7,6 +7,15 @@ ComMet is a method based on genome-scale metabolic models for comparing differen
 - identification of metabolically distinct network modules
 - visualization of network modules as reaction and metabolic map
 
+## Dependencies
+Please follow the instructions on the respective pages
+# Toolboxes
+- [COBRA](https://github.com/opencobra/cobratoolbox/)
+- [Icasso](https://research.ics.aalto.fi/ica/icasso/)
+# MATLAB functions  
+ - [insertrows](https://nl.mathworks.com/matlabcentral/fileexchange/9984-insertrows)
+ - [knee point](https://nl.mathworks.com/matlabcentral/fileexchange/35094-knee-point)
+
 ## Preprint
 Sarathy C et al., ComMet: A method for comparing metabolic states in genome-scale metabolic models. bioRxiv, Cold Spring Harbour Laboratory. 2020. https://doi.org/10.1101/2020.09.14.296145
 

diff --git a/src/ICA_N_optimization.m b/src/ICA_N_optimization.m
@@ -1,27 +1,35 @@
-% This script finds the optimum 'N' (or number of independent components
-% necessary for decomposition) for the dataset using a bootstrapping
-% approach
-
-% load PCs from both simulations
-
-% set decimal precision and filter reactions with low loadings
-pcs_all =  [round(rotatedComp_un_unnorm, 3), round(rotatedComp_bcaa0_unnorm, 3)];
-pcs_all_rem = filterLowLoadingsReactions(pcs_all)'; % 4061 to 1803 rxns
-
-% Reduce dimension from 1033 ordinal signals to 'numComps' (lastEig) in each iteration. 
-% Resample 100 (M) times using random initial conditions and bootstrapping
-% (use FastICA parameters: kurtosis as non-linearity,
-% symmetric estimation approach)
-
-numIter = 100;
-numComps = [2:1:101];
-
-for uu = 1:length(numComps)
-    tic;
-    boot_ica = icassoEst('both', pcs_all_rem, numIter, 'g', 'pow3', 'approach', 'symm', 'lastEig', numComps(uu));
-    end_time_boot = toc;
-    sR = icassoExp(boot_ica);
-    end_time_sR = toc;
-    save(['Results_nComps_',num2str(numComps(uu)),'.mat'],'boot_ica','sR',...
-        'end_time_boot','end_time_sR');
+% This script finds the optimum 'N' (or number of independent components
+% necessary for decomposition) for the dataset using a bootstrapping
+% approach
+
+% load PCs from both simulations
+
+% set decimal precision and filter reactions with low loadings
+pcs_all =  [round(rotatedComp_un_unnorm, 3), round(rotatedComp_bcaa0_unnorm, 3)];
+pcs_all_rem = filterLowLoadingsReactions(pcs_all)'; % 4061 to 1803 rxns
+
+% Reduce dimension from 1033 ordinal signals to 'numComps' (lastEig) in each iteration. 
+% Resample 100 (M) times using random initial conditions and bootstrapping
+% (use FastICA parameters: kurtosis as non-linearity,
+% symmetric estimation approach)
+
+numIter = 100;
+numComps = [2:1:90, 95, 100];
+
+for uu = 1:length(numComps)
+    failed = 'true';
+    while strcmp(failed, 'true')
+        try
+            tic;
+            boot_ica = icassoEst('both', pcs_all_rem, numIter, 'g', 'pow3', 'approach', 'symm', 'lastEig', numComps(uu));
+            end_time_boot = toc; 
+            failed = 'false';
+            break;
+        end
+    end    
+    sR = icassoExp(boot_ica);
+    end_time_sR = toc;
+    save(['Results_nComps_',num2str(numComps(uu)),'.mat'],'boot_ica','sR',...
+        'end_time_boot','end_time_sR');
+    clear('boot_ica','sR','end_time_boot','end_time_sR');
 end
diff --git a/src/ICA_iters.m b/src/ICA_iters.m
@@ -1,18 +1,19 @@
-function [ica_iters, end_time] = ICA_iters(pcs_all, optN, numIters)
-%UNTITLED2 Summary of this function goes here
-%   Detailed explanation goes here
-
-
-nn = 1:numIters;
-tic;
-for uu = 1:length(nn)
-
-    % ICA with fixed random variable
-    rng(uu);
-    ica_iters{uu} = fastICA(pcs_all, optN);
-
-end
-end_time=toc;
-
-end
-
+function [ica_iters, end_time] = ICA_iters(pcs_all, optN, numIters)
+%UNTITLED2 Summary of this function goes here
+%   Detailed explanation goes here
+
+
+nn = 1:numIters;
+tic;
+for uu = 1:length(nn)
+    fprintf("Iteration: %d\n", uu);
+    % ICA with fixed random variable
+    rng(uu, 'twister');
+    ica_iters{uu} =  fastica(pcs_all, 'numOfIC', optN, 'g', 'pow3', 'approach', 'symm');
+    if any(uu == [0:500:numIters])
+        save(['Results_iter_',num2str(uu),'.mat'],'ica_iters');
+    end
+end_time=toc;
+
+end
+
diff --git a/src/getGlobalModules.m b/src/getGlobalModules.m
@@ -1,37 +1,37 @@
-function [module_uniqRxns, singleModuleRxnIDs, lenRotComp, singleModuleRxnInds, graphTab, attTab] = getGlobalModules(modelEP, model, rotatedComp, singlesIDs, ubiquitousMets, graphFileName, attFileName)
-
-% filter singletons from all PCs
-singlePCs_PC = rotatedComp(:,singlesIDs);
-
-% for each PC, get module
-[singleModuleRxnIDs, lenRotComp, singleModuleRxnInds] = getModule(modelEP, singlePCs_PC);
-
-% get unique reactions from the modules (pass the inds, not rxn IDs)
-module_uniqRxns = unique(cell2mat(singleModuleRxnInds));
-inds = find(contains(model.rxns, modelEP.rxns(module_uniqRxns)));
-
-% extract submodel of module rxns
-% subModel = efmSubmodelExtractionAsSBML_raven(iAdipo_ex, inds);
-% metaboliteList = subModel.mets(find(ismember(subModel.metNames, ubiquitousMets)));
-% subModel_woUb = removeMetabolites(subModel, metaboliteList, 0);
-
-subModel = efmSubmodelExtractionAsSBML_raven(modelEP, module_uniqRxns);
-% subModel2 = efmSubmodelExtractionAsSBML_raven(modelEP, module_uniqRxns, true, ubiquitousMets);
-
-% remove reactions that contain only ubiquitous metabolites
-subModel_woUb = removeMets(subModel, ubiquitousMets, true, true);
-
-% extract rxns graph and attirbutes file for rxn nws
-% [file1, file2] = createRxnNw(subModel_woUb, modelEP, singleModuleRxnInds, singlesIDs);
-
-% create graph file
-graphTab = createGraphFile(subModel_woUb);
-
-% create attribute file
-attTab = createAttribFile(modelEP, subModel_woUb, singleModuleRxnInds, singlesIDs);
-
-% writetable(graphTab, graphFileName, 'Delimiter', ';');
-% writetable(attTab, attFileName, 'Delimiter', ';');
-
-end
-
+function [module_uniqRxns, singleModuleRxnIDs, lenRotComp, singleModuleRxnInds, graphTab, attTab] = getGlobalModules(modelEP, model, rotatedComp, singlesIDs, ubiquitousMets, graphFileName, attFileName)
+
+% filter singletons from all PCs
+singlePCs_PC = rotatedComp(:,singlesIDs);
+
+% for each PC, get module
+[singleModuleRxnIDs, lenRotComp, singleModuleRxnInds] = getModule(modelEP, singlePCs_PC);
+
+% get unique reactions from the modules (pass the inds, not rxn IDs)
+module_uniqRxns = unique(cell2mat(singleModuleRxnInds));
+inds = find(contains(model.rxns, modelEP.rxns(module_uniqRxns)));
+
+% extract submodel of module rxns
+% subModel = efmSubmodelExtractionAsSBML_raven(iAdipo_ex, inds);
+% metaboliteList = subModel.mets(find(ismember(subModel.metNames, ubiquitousMets)));
+% subModel_woUb = removeMetabolites(subModel, metaboliteList, 0);
+
+subModel = efmSubmodelExtractionAsSBML_raven(modelEP, module_uniqRxns);
+% subModel2 = efmSubmodelExtractionAsSBML_raven(modelEP, module_uniqRxns, true, ubiquitousMets);
+
+% remove reactions that contain only ubiquitous metabolites
+subModel_woUb = removeMets(subModel, ubiquitousMets, true, true);
+
+% extract rxns graph and attirbutes file for rxn nws
+% [file1, file2] = createRxnNw(subModel_woUb, modelEP, singleModuleRxnInds, singlesIDs);
+
+% create graph file
+graphTab = createGraphFile(subModel_woUb);
+
+% create attribute file
+attTab = createAttribFile(modelEP, subModel_woUb, singleModuleRxnInds, singlesIDs);
+
+writetable(graphTab, graphFileName, 'Delimiter', ';');
+writetable(attTab, attFileName, 'Delimiter', ';');
+
+end
+
diff --git a/src/getPCs.m b/src/getPCs.m
@@ -1,76 +1,72 @@
-function [rotatedComp, numRot_perVar, sortedVariance, cols] = getPCs(covMatrix, var)
-% This function takes a covariance matrix, computes its principal components and rotates
-% the components that explain certain (ex. 99.9%) variation in the sampled space
-%
-% USAGE
-%   rotatedComp = getPCs(covMatrix, var);
-%
-% INPUT
-%   covMatrix:   covariance matrix resulting from EP MetabolicEP() 
-%   var:         value for extracting components that explain 
-%                certain percent of the variation in the flux space. ex: 99.9
-%
-% OUTPUT
-%	rotatedComp:    matrix containing the rotated principal components
-%
-% OPTIONAL OUTPUT
-%
-%   cols:           total number of columns explaining 'var' percent vatriation
-%   numRot_perVar:  number of variables explaining (cumulative) variation
-%   sortedVariance: principal component loading matrix sorted by variance explained
-%
-% EXAMPLE:
-%   rotatedComp = getPCs(covMatrix, 99.9);
-%
-% ..Author: 
-%   Chaitra Sarathy, 31 Aug 2020 (initial commit of ComMet)
-
-
-% mean centring the sampled data points
-% for ii=1:size(data,1)
-%     meanCentred(ii,:) = data(ii,:)-mean(data(ii,:));
-% end
-
-
-% basis rotation (PCA) with cols=variables(rxns), rows=sampled datapoints
-% [coeff,newdata,latend,tsd,variance] = pca(meanCentred');
-% "coeff" are the principal component vectors. These are the eigenvectors of the covariance matrix. 
-tic
-[coeff,D] = eig(covMatrix);
-
-% calculate variance explained by normalizing eigen values
-variance = D / sum(D(:));
-% aa=sort(diag(D),'descend');
-
-% sort the variances
-eigVals = diag(variance)*100;
-sortedVariance = sort(eigVals,'descend');
-
-% select the number of variables explaining 'var'% variation
-numRot = length(find(cumsum(sortedVariance)<=var));
-cols = cell2mat(arrayfun(@(x) find(eigVals == x), sortedVariance(1:numRot),'uni', 0)) ;
-
-
-% numRot = length(find(cumsum(variance)<=95));
-% Code for generating plot of variance vs numPCs
-numRot_perVar=[];
-num = 0:0.1:99.9;
-for tt=1:length(num)
-    numRot_perVar(tt) = length(find(cumsum(sortedVariance)<=num(tt)));
-end
-plot(numRot_perVar,num, '*')
-% prepare the coefficients for rotation, select the components explaining
-% 99.9% variation and then rotate
-% EROOR FIX: 'svd infinity' error was due to zeros in the coeff matrix, so remove zeros
-[a,~] = find(coeff(:,cols)==0);
-forRotation = coeff(:,cols);
-forRotation(unique(a),:)=[]; 
-% varimax rotation
-% ERROR FIX: Passing 'Maxit', 1500 fixes the Error: Iteration limit exceeded for factor rotation.
-rotatedComp = rotatefactors(forRotation, 'Maxit', 1000, 'Normalize', 'off');
-
-toc
-tPCA_Rot=toc;
-
-end
-
+function [rotatedComp, numRot_perVar, sortedVariance, cols, rem] = getPCs(covMatrix, var)
+% This function takes a covariance matrix, computes its principal components and rotates
+% the components that explain certain (ex. 99.9%) variation in the sampled space
+%
+% USAGE
+%   rotatedComp = getPCs(covMatrix, var);
+%
+% INPUT
+%   covMatrix:   covariance matrix resulting from EP MetabolicEP() 
+%   var:         value for extracting components that explain 
+%                certain percent of the variation in the flux space. ex: 99.9
+%
+% OUTPUT
+%	rotatedComp:    matrix containing the rotated principal components
+%
+% OPTIONAL OUTPUT
+%
+%   cols:           total number of columns explaining 'var' percent vatriation
+%   numRot_perVar:  number of variables explaining (cumulative) variation
+%   sortedVariance: principal component loading matrix sorted by variance explained
+%
+% EXAMPLE:
+%   rotatedComp = getPCs(covMatrix, 99.9);
+%
+% ..Author: 
+%   Chaitra Sarathy, 31 Aug 2020 (initial commit of ComMet)
+
+
+% mean centring the sampled data points
+% for ii=1:size(data,1)
+%     meanCentred(ii,:) = data(ii,:)-mean(data(ii,:));
+% end
+
+
+% basis rotation (PCA) with cols=variables(rxns), rows=sampled datapoints
+% [coeff,newdata,latend,tsd,variance] = pca(meanCentred');
+% "coeff" are the principal component vectors. These are the eigenvectors of the covariance matrix. 
+tic
+[coeff,D] = eig(covMatrix);
+
+% calculate variance explained by normalizing eigen values
+variance = D / sum(D(:));
+% aa=sort(diag(D),'descend');
+
+% sort the variances
+eigVals = diag(variance)*100;
+sortedVariance = sort(eigVals,'descend');
+
+% select the number of variables explaining 'var'% variation
+numRot = length(find(cumsum(sortedVariance)<=var));
+cols = cell2mat(arrayfun(@(x) find(eigVals == x), sortedVariance(1:numRot),'uni', 0)) ;
+
+% Code for generating plot of variance vs numPCs
+numRot_perVar=[];
+num = 0:0.1:99.9;
+for tt=1:length(num)
+    numRot_perVar(tt) = length(find(cumsum(sortedVariance)<=num(tt)));
+end
+
+[rem,~] = find(coeff(:,cols)==0);
+forRotation = coeff(:,cols);
+forRotation(unique(rem),:)=[]; 
+% varimax rotation
+rotatedComp = rotatefactors(forRotation, 'Maxit', 1000, 'Normalize', 'off');
+
+toc
+tPCA_Rot=toc;
+
+rotatedComp = insertrows(rotatedComp,zeros(1,size(rotatedComp,2)),unique(rem));
+
+end
+