Skip to content

Commit

Permalink
some useful codes
Browse files Browse the repository at this point in the history
by DengCai
  • Loading branch information
lgbwust committed Jul 13, 2015
1 parent 5435473 commit 807c6ed
Show file tree
Hide file tree
Showing 86 changed files with 13,481 additions and 0 deletions.
71 changes: 71 additions & 0 deletions CSRKDApredict.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
function [accuracy,predictlabel,elapse] = CSRKDApredict(fea, gnd, model)
% SRKDApredict: Spectral Regression Kernel Discriminant Analysis Prediction
% SRKDApredict use SRKDA as a classifier. It used the nearest
% center rule in the SRKDA subspace for classification.
%
% [predictlabel,accuracy,elapse] = SRKDApredict(fea, gnd, model);
%
% Input:
%
% fea - data matrix. Each row is a data point.
% gnd - Label vector of fea.
% model - model trained by SRKDAtrain.m
%
% Output:
%
% accuracy - classification accuracy
% predictlabel - predict label for fea
% elapse - running time.
%
% Examples:
%
%
% See also SRKDAtrain, KSR, KSR_caller
%
%Reference:
%
% [1] Deng Cai, Xiaofei He, and Jiawei Han. "Speed Up Kernel Discriminant
% Analysis", The VLDB Journal, vol. 20, no. 1, pp. 21-33, January, 2011.
%
% [2] Deng Cai, Xiaofei He and Jiawei Han, "SRDA: An Efficient Algorithm for
% Large Scale Discriminant Analysis" IEEE Transactions on Knowledge and
% Data Engineering, vol. 20, no. 1, pp. 1-12, January, 2008.
%
% [3] V. Sindhwani, P. Niyogi, M. Belkin, "Beyond the Point Cloud: from
% Transductive to Semi-supervised Learning", ICML 2005.
%
% version 2.0 --December/2011
% version 1.0 --May/2006
%
% Written by Deng Cai (dengcai AT gmail.com)
%

MAX_MATRIX_SIZE = 8000; % You can change this number based on your memory.



nTrain = size(model.Landmark,1);
nTest = size(fea,1);
nBlock = ceil(MAX_MATRIX_SIZE*MAX_MATRIX_SIZE/nTrain);
Embed_Test = zeros(nTest,size(model.projection,2));
for i = 1:ceil(nTest/nBlock)
if i == ceil(nTest/nBlock)
smpIdx = (i-1)*nBlock+1:nTest;
else
smpIdx = (i-1)*nBlock+1:i*nBlock;
end
KTest= constructKernel(fea(smpIdx,:),model.Landmark,model.options);
Embed_Test(smpIdx,:) = KTest*model.projection;
clear KTest;
end

D = EuDist2(Embed_Test,model.ClassCenter,0);
[dump, idx] = min(D,[],2);
predictlabel = model.ClassLabel(idx);

accuracy = 1 - length(find(predictlabel-gnd))/nTest;





157 changes: 157 additions & 0 deletions CSRKDAtrain.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
function [model] = CSRKDAtrain(feaLabel, gnd, options, feaTrain, Landmark)
% SRKDAtrain: Training Spectral Regression Kernel Discriminant Analysis
%
% [model] = SRKDAtrain(feaLabel, gnd)
% [model] = SRKDAtrain(feaLabel, gnd, options)
% [model] = SRKDAtrain(feaLabel, gnd, options, feaTrain)
%
% Input:
%
% feaLabel - data matrix. Each row is a data point.
% gnd - Label vector of feaLabel.
% feaTrain - data matrix. This input is optional. If provided,
% SRKDA will be performed in a semi-supervised way.
% feaTrain will be the training data without label.
% options - Struct value in Matlab. The fields in options
% that can be set:
%
% KernelType - Choices are:
% 'Gaussian' - e^{-(|x-y|^2)/2t^2}
% 'Polynomial' - (x'*y)^d
% 'PolyPlus' - (x'*y+1)^d
% 'Linear' - x'*y
%
% t - parameter for Gaussian
% d - parameter for Poly
%
% ReguAlpha - regularization paramter for regression
% Default 0.01
%
% The following fields are only useful when feaTrain is provided.
%
% ReguBeta - Paramter for manifold regularizer
% Default 1
% Fields for W - Please see ConstructW.m for detailed options.
%
% LaplacianNorm = 0 | 1 (0 for un-normalized and 1 for
% normalized graph laplacian)
% Default: 0
% LaplacianDegree - power of the graph Laplacian to use as
% the graph regularizer
% Default: 1
%
%
%
%
% Output:
% model - used for SRKDApredict.m
%
%
% Examples:
%
%
%
% See also KSR, KSR_caller
%
%Reference:
%
% [1] Deng Cai, Xiaofei He, and Jiawei Han. "Speed Up Kernel Discriminant
% Analysis", The VLDB Journal, vol. 20, no. 1, pp. 21-33, January, 2011.
%
% [2] Deng Cai, Xiaofei He and Jiawei Han, "SRDA: An Efficient Algorithm for
% Large Scale Discriminant Analysis" IEEE Transactions on Knowledge and
% Data Engineering, vol. 20, no. 1, pp. 1-12, January, 2008.
%
% [3] V. Sindhwani, P. Niyogi, M. Belkin, "Beyond the Point Cloud: from
% Transductive to Semi-supervised Learning", ICML 2005.
%
% version 2.0 --December/2011
% version 1.0 --May/2006
%
% Written by Deng Cai (dengcai AT gmail.com)
%
if ~exist('options','var')
options = [];
end

if ~isfield(options,'KernelType')
options.ReguAlpha = 'Gaussian';
end

if ~isfield(options,'t')
nSmp = size(feaLabel,1);
idx=randperm(nSmp);
if nSmp > 3000
D = EuDist2(feaLabel(idx(1:3000),:));
else
D = EuDist2(feaLabel);
end
options.t = mean(mean(D));
end

options.ReguType = 'Ridge';
if ~isfield(options,'ReguAlpha')
options.ReguAlpha = 0.01;
end

if ~isfield(options,'BasisNum')
options.BasisNum = 500;
end

if ~isfield(options,'MaxIter')
options.MaxIter = 10;
end

model.options = options;

nSmp = size(feaLabel,1);

ClassLabel = unique(gnd);
model.ClassLabel = ClassLabel;
nClass = length(ClassLabel);

% Response Generation
rand('state',0);
Y = rand(nClass,nClass);
Z = zeros(nSmp,nClass);
for i=1:nClass
idx = find(gnd==ClassLabel(i));
Z(idx,:) = repmat(Y(i,:),length(idx),1);
end
Z(:,1) = ones(nSmp,1);
[Y,R] = qr(Z,0);
Y(:,1) = [];

if exist('Landmark','var')
model.Landmark = Landmark;
else
if exist('feaTrain','var')
fea = [feaLabel;feaTrain];
if size(fea,1) < options.BasisNum
error('The data is too small, use SRKDA directly!');
end
[dump, model.Landmark] = litekmeans(fea, options.BasisNum, 'MaxIter', options.MaxIter);
else
if size(feaLabel,1) < options.BasisNum
error('The data is too small, use SRKDA directly!');
end
[dump, model.Landmark] = litekmeans(feaLabel, options.BasisNum, 'MaxIter', options.MaxIter);
end
end

K = constructKernel(feaLabel,model.Landmark,options);
options.RemoveMean = 1;
model.projection = SR(options, Y, K);
Embed_Train = K*model.projection;

ClassCenter = zeros(nClass,size(Embed_Train,2));
for i = 1:nClass
feaTmp = Embed_Train(gnd == ClassLabel(i),:);
ClassCenter(i,:) = mean(feaTmp,1);
end
model.ClassCenter = ClassCenter;





126 changes: 126 additions & 0 deletions EMR.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
function [score, model] = EMR(data,y0,opts)
% [score, model] = EMR(data,y0,opts): Efficient Manifold Ranking
% Input:
% - data: the data matrix of size nSmp x nFea, where each row is a sample
% point
% - y0: the initial query vector, e.g., query item =1 and the other all 0;
%
% opts: options for this algorithm
% - p: the number of landmarks picked (default 1000)
% - r: the number of nearest landmarks for representation (default 5)
% - a: weight in manifold ranking, score = (I - aS)^(-1)y, default 0.99
% - mode: landmark selection method, currently support
% - 'kmeans': use centers of clusters generated by kmeans (default)
% - 'random': use randomly sampled points from the original
% data set
% The following parameters are effective ONLY in mode 'kmeans'
% - kmNumRep: the number of replicates for initial kmeans (default 1)
% - kmMaxIter: the maximum number of iterations for initial kmeans (default 5)
%
% Output:
% - score: the ranking scores for each point
% - model: the learned model for out-of-sample retrieval
%
% Usage:
%
% See: http://www.zjucadcg.cn/dengcai/Data/ReproduceExp.html#EMR
%
%Reference:
%
% Bin Xu, Jiajun Bu, Chun Chen, Deng Cai, Xiaofei He, Wei Liu, Jiebo
% Luo, "Efficient Manifold Ranking for Image Retrieval",in Proceeding of
% the 34th International ACM SIGIR Conference on Research and
% Development in Information Retrieval (SIGIR), 2011, pp. 525-534.
%
% version 2.0 --Feb./2012
% version 1.0 --Sep./2010
%
% Written by Bin Xu (binxu986 AT gmail.com)
% Deng Cai (dengcai AT gmail.com)


% Set and parse parameters
if (~exist('opts','var'))
opts = [];
end

p = 1000;
if isfield(opts,'p')
p = opts.p;
end

r = 5;
if isfield(opts,'r')
r = opts.r;
end

a = 0.99;
if isfield(opts,'a')
a = opts.a;
end

mode = 'kmeans';
if isfield(opts,'mode')
mode = opts.mode;
end

nSmp =size(data,1);

% Landmark selection
if strcmp(mode,'kmeans')
kmMaxIter = 5;
if isfield(opts,'kmMaxIter')
kmMaxIter = opts.kmMaxIter;
end
kmNumRep = 1;
if isfield(opts,'kmNumRep')
kmNumRep = opts.kmNumRep;
end
[dump,landmarks]=litekmeans(data,p,'MaxIter',kmMaxIter,'Replicates',kmNumRep);
clear kmMaxIter kmNumRep
elseif strcmp(mode,'random')
indSmp = randperm(nSmp);
landmarks = data(indSmp(1:p),:);
clear indSmp
else
error('mode does not support!');
end

model.landmarks = landmarks;
model.a = a;
model.r = r;

% Z construction
D = EuDist2(data,landmarks);
dump = zeros(nSmp,r);
idx = dump;
for i = 1:r
[dump(:,i),idx(:,i)] = min(D,[],2);
temp = (idx(:,i)-1)*nSmp+[1:nSmp]';
D(temp) = 1e100;
end
dump = bsxfun(@rdivide,dump,dump(:,r));
dump = 0.75 * (1 - dump.^2);
Gsdx = dump;
Gidx = repmat([1:nSmp]',1,r);
Gjdx = idx;
Z=sparse(Gidx(:),Gjdx(:),Gsdx(:),nSmp,p);

model.Z = Z';

% Efficient Ranking
feaSum = full(sum(Z,1));
D = Z*feaSum';
D = max(D, 1e-12);
D = D.^(-.5);
H = spdiags(D,0,nSmp,nSmp)*Z;

C = speye(p);
A = H'*H-(1/a)*C;

tmp = H'*y0;
tmp = A\tmp;
score = y0 - H*tmp;



Loading

0 comments on commit 807c6ed

Please sign in to comment.