Skip to content

Commit 807c6ed

Browse files
committed
some useful codes
by DengCai
1 parent 5435473 commit 807c6ed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

86 files changed

+13481
-0
lines changed

CSRKDApredict.m

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
function [accuracy,predictlabel,elapse] = CSRKDApredict(fea, gnd, model)
2+
% SRKDApredict: Spectral Regression Kernel Discriminant Analysis Prediction
3+
% SRKDApredict use SRKDA as a classifier. It used the nearest
4+
% center rule in the SRKDA subspace for classification.
5+
%
6+
% [predictlabel,accuracy,elapse] = SRKDApredict(fea, gnd, model);
7+
%
8+
% Input:
9+
%
10+
% fea - data matrix. Each row is a data point.
11+
% gnd - Label vector of fea.
12+
% model - model trained by SRKDAtrain.m
13+
%
14+
% Output:
15+
%
16+
% accuracy - classification accuracy
17+
% predictlabel - predict label for fea
18+
% elapse - running time.
19+
%
20+
% Examples:
21+
%
22+
%
23+
% See also SRKDAtrain, KSR, KSR_caller
24+
%
25+
%Reference:
26+
%
27+
% [1] Deng Cai, Xiaofei He, and Jiawei Han. "Speed Up Kernel Discriminant
28+
% Analysis", The VLDB Journal, vol. 20, no. 1, pp. 21-33, January, 2011.
29+
%
30+
% [2] Deng Cai, Xiaofei He and Jiawei Han, "SRDA: An Efficient Algorithm for
31+
% Large Scale Discriminant Analysis" IEEE Transactions on Knowledge and
32+
% Data Engineering, vol. 20, no. 1, pp. 1-12, January, 2008.
33+
%
34+
% [3] V. Sindhwani, P. Niyogi, M. Belkin, "Beyond the Point Cloud: from
35+
% Transductive to Semi-supervised Learning", ICML 2005.
36+
%
37+
% version 2.0 --December/2011
38+
% version 1.0 --May/2006
39+
%
40+
% Written by Deng Cai (dengcai AT gmail.com)
41+
%
42+
43+
MAX_MATRIX_SIZE = 8000; % You can change this number based on your memory.
44+
45+
46+
47+
nTrain = size(model.Landmark,1);
48+
nTest = size(fea,1);
49+
nBlock = ceil(MAX_MATRIX_SIZE*MAX_MATRIX_SIZE/nTrain);
50+
Embed_Test = zeros(nTest,size(model.projection,2));
51+
for i = 1:ceil(nTest/nBlock)
52+
if i == ceil(nTest/nBlock)
53+
smpIdx = (i-1)*nBlock+1:nTest;
54+
else
55+
smpIdx = (i-1)*nBlock+1:i*nBlock;
56+
end
57+
KTest= constructKernel(fea(smpIdx,:),model.Landmark,model.options);
58+
Embed_Test(smpIdx,:) = KTest*model.projection;
59+
clear KTest;
60+
end
61+
62+
D = EuDist2(Embed_Test,model.ClassCenter,0);
63+
[dump, idx] = min(D,[],2);
64+
predictlabel = model.ClassLabel(idx);
65+
66+
accuracy = 1 - length(find(predictlabel-gnd))/nTest;
67+
68+
69+
70+
71+

CSRKDAtrain.m

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
function [model] = CSRKDAtrain(feaLabel, gnd, options, feaTrain, Landmark)
2+
% SRKDAtrain: Training Spectral Regression Kernel Discriminant Analysis
3+
%
4+
% [model] = SRKDAtrain(feaLabel, gnd)
5+
% [model] = SRKDAtrain(feaLabel, gnd, options)
6+
% [model] = SRKDAtrain(feaLabel, gnd, options, feaTrain)
7+
%
8+
% Input:
9+
%
10+
% feaLabel - data matrix. Each row is a data point.
11+
% gnd - Label vector of feaLabel.
12+
% feaTrain - data matrix. This input is optional. If provided,
13+
% SRKDA will be performed in a semi-supervised way.
14+
% feaTrain will be the training data without label.
15+
% options - Struct value in Matlab. The fields in options
16+
% that can be set:
17+
%
18+
% KernelType - Choices are:
19+
% 'Gaussian' - e^{-(|x-y|^2)/2t^2}
20+
% 'Polynomial' - (x'*y)^d
21+
% 'PolyPlus' - (x'*y+1)^d
22+
% 'Linear' - x'*y
23+
%
24+
% t - parameter for Gaussian
25+
% d - parameter for Poly
26+
%
27+
% ReguAlpha - regularization paramter for regression
28+
% Default 0.01
29+
%
30+
% The following fields are only useful when feaTrain is provided.
31+
%
32+
% ReguBeta - Paramter for manifold regularizer
33+
% Default 1
34+
% Fields for W - Please see ConstructW.m for detailed options.
35+
%
36+
% LaplacianNorm = 0 | 1 (0 for un-normalized and 1 for
37+
% normalized graph laplacian)
38+
% Default: 0
39+
% LaplacianDegree - power of the graph Laplacian to use as
40+
% the graph regularizer
41+
% Default: 1
42+
%
43+
%
44+
%
45+
%
46+
% Output:
47+
% model - used for SRKDApredict.m
48+
%
49+
%
50+
% Examples:
51+
%
52+
%
53+
%
54+
% See also KSR, KSR_caller
55+
%
56+
%Reference:
57+
%
58+
% [1] Deng Cai, Xiaofei He, and Jiawei Han. "Speed Up Kernel Discriminant
59+
% Analysis", The VLDB Journal, vol. 20, no. 1, pp. 21-33, January, 2011.
60+
%
61+
% [2] Deng Cai, Xiaofei He and Jiawei Han, "SRDA: An Efficient Algorithm for
62+
% Large Scale Discriminant Analysis" IEEE Transactions on Knowledge and
63+
% Data Engineering, vol. 20, no. 1, pp. 1-12, January, 2008.
64+
%
65+
% [3] V. Sindhwani, P. Niyogi, M. Belkin, "Beyond the Point Cloud: from
66+
% Transductive to Semi-supervised Learning", ICML 2005.
67+
%
68+
% version 2.0 --December/2011
69+
% version 1.0 --May/2006
70+
%
71+
% Written by Deng Cai (dengcai AT gmail.com)
72+
%
73+
if ~exist('options','var')
74+
options = [];
75+
end
76+
77+
if ~isfield(options,'KernelType')
78+
options.ReguAlpha = 'Gaussian';
79+
end
80+
81+
if ~isfield(options,'t')
82+
nSmp = size(feaLabel,1);
83+
idx=randperm(nSmp);
84+
if nSmp > 3000
85+
D = EuDist2(feaLabel(idx(1:3000),:));
86+
else
87+
D = EuDist2(feaLabel);
88+
end
89+
options.t = mean(mean(D));
90+
end
91+
92+
options.ReguType = 'Ridge';
93+
if ~isfield(options,'ReguAlpha')
94+
options.ReguAlpha = 0.01;
95+
end
96+
97+
if ~isfield(options,'BasisNum')
98+
options.BasisNum = 500;
99+
end
100+
101+
if ~isfield(options,'MaxIter')
102+
options.MaxIter = 10;
103+
end
104+
105+
model.options = options;
106+
107+
nSmp = size(feaLabel,1);
108+
109+
ClassLabel = unique(gnd);
110+
model.ClassLabel = ClassLabel;
111+
nClass = length(ClassLabel);
112+
113+
% Response Generation
114+
rand('state',0);
115+
Y = rand(nClass,nClass);
116+
Z = zeros(nSmp,nClass);
117+
for i=1:nClass
118+
idx = find(gnd==ClassLabel(i));
119+
Z(idx,:) = repmat(Y(i,:),length(idx),1);
120+
end
121+
Z(:,1) = ones(nSmp,1);
122+
[Y,R] = qr(Z,0);
123+
Y(:,1) = [];
124+
125+
if exist('Landmark','var')
126+
model.Landmark = Landmark;
127+
else
128+
if exist('feaTrain','var')
129+
fea = [feaLabel;feaTrain];
130+
if size(fea,1) < options.BasisNum
131+
error('The data is too small, use SRKDA directly!');
132+
end
133+
[dump, model.Landmark] = litekmeans(fea, options.BasisNum, 'MaxIter', options.MaxIter);
134+
else
135+
if size(feaLabel,1) < options.BasisNum
136+
error('The data is too small, use SRKDA directly!');
137+
end
138+
[dump, model.Landmark] = litekmeans(feaLabel, options.BasisNum, 'MaxIter', options.MaxIter);
139+
end
140+
end
141+
142+
K = constructKernel(feaLabel,model.Landmark,options);
143+
options.RemoveMean = 1;
144+
model.projection = SR(options, Y, K);
145+
Embed_Train = K*model.projection;
146+
147+
ClassCenter = zeros(nClass,size(Embed_Train,2));
148+
for i = 1:nClass
149+
feaTmp = Embed_Train(gnd == ClassLabel(i),:);
150+
ClassCenter(i,:) = mean(feaTmp,1);
151+
end
152+
model.ClassCenter = ClassCenter;
153+
154+
155+
156+
157+

EMR.m

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
function [score, model] = EMR(data,y0,opts)
2+
% [score, model] = EMR(data,y0,opts): Efficient Manifold Ranking
3+
% Input:
4+
% - data: the data matrix of size nSmp x nFea, where each row is a sample
5+
% point
6+
% - y0: the initial query vector, e.g., query item =1 and the other all 0;
7+
%
8+
% opts: options for this algorithm
9+
% - p: the number of landmarks picked (default 1000)
10+
% - r: the number of nearest landmarks for representation (default 5)
11+
% - a: weight in manifold ranking, score = (I - aS)^(-1)y, default 0.99
12+
% - mode: landmark selection method, currently support
13+
% - 'kmeans': use centers of clusters generated by kmeans (default)
14+
% - 'random': use randomly sampled points from the original
15+
% data set
16+
% The following parameters are effective ONLY in mode 'kmeans'
17+
% - kmNumRep: the number of replicates for initial kmeans (default 1)
18+
% - kmMaxIter: the maximum number of iterations for initial kmeans (default 5)
19+
%
20+
% Output:
21+
% - score: the ranking scores for each point
22+
% - model: the learned model for out-of-sample retrieval
23+
%
24+
% Usage:
25+
%
26+
% See: http://www.zjucadcg.cn/dengcai/Data/ReproduceExp.html#EMR
27+
%
28+
%Reference:
29+
%
30+
% Bin Xu, Jiajun Bu, Chun Chen, Deng Cai, Xiaofei He, Wei Liu, Jiebo
31+
% Luo, "Efficient Manifold Ranking for Image Retrieval",in Proceeding of
32+
% the 34th International ACM SIGIR Conference on Research and
33+
% Development in Information Retrieval (SIGIR), 2011, pp. 525-534.
34+
%
35+
% version 2.0 --Feb./2012
36+
% version 1.0 --Sep./2010
37+
%
38+
% Written by Bin Xu (binxu986 AT gmail.com)
39+
% Deng Cai (dengcai AT gmail.com)
40+
41+
42+
% Set and parse parameters
43+
if (~exist('opts','var'))
44+
opts = [];
45+
end
46+
47+
p = 1000;
48+
if isfield(opts,'p')
49+
p = opts.p;
50+
end
51+
52+
r = 5;
53+
if isfield(opts,'r')
54+
r = opts.r;
55+
end
56+
57+
a = 0.99;
58+
if isfield(opts,'a')
59+
a = opts.a;
60+
end
61+
62+
mode = 'kmeans';
63+
if isfield(opts,'mode')
64+
mode = opts.mode;
65+
end
66+
67+
nSmp =size(data,1);
68+
69+
% Landmark selection
70+
if strcmp(mode,'kmeans')
71+
kmMaxIter = 5;
72+
if isfield(opts,'kmMaxIter')
73+
kmMaxIter = opts.kmMaxIter;
74+
end
75+
kmNumRep = 1;
76+
if isfield(opts,'kmNumRep')
77+
kmNumRep = opts.kmNumRep;
78+
end
79+
[dump,landmarks]=litekmeans(data,p,'MaxIter',kmMaxIter,'Replicates',kmNumRep);
80+
clear kmMaxIter kmNumRep
81+
elseif strcmp(mode,'random')
82+
indSmp = randperm(nSmp);
83+
landmarks = data(indSmp(1:p),:);
84+
clear indSmp
85+
else
86+
error('mode does not support!');
87+
end
88+
89+
model.landmarks = landmarks;
90+
model.a = a;
91+
model.r = r;
92+
93+
% Z construction
94+
D = EuDist2(data,landmarks);
95+
dump = zeros(nSmp,r);
96+
idx = dump;
97+
for i = 1:r
98+
[dump(:,i),idx(:,i)] = min(D,[],2);
99+
temp = (idx(:,i)-1)*nSmp+[1:nSmp]';
100+
D(temp) = 1e100;
101+
end
102+
dump = bsxfun(@rdivide,dump,dump(:,r));
103+
dump = 0.75 * (1 - dump.^2);
104+
Gsdx = dump;
105+
Gidx = repmat([1:nSmp]',1,r);
106+
Gjdx = idx;
107+
Z=sparse(Gidx(:),Gjdx(:),Gsdx(:),nSmp,p);
108+
109+
model.Z = Z';
110+
111+
% Efficient Ranking
112+
feaSum = full(sum(Z,1));
113+
D = Z*feaSum';
114+
D = max(D, 1e-12);
115+
D = D.^(-.5);
116+
H = spdiags(D,0,nSmp,nSmp)*Z;
117+
118+
C = speye(p);
119+
A = H'*H-(1/a)*C;
120+
121+
tmp = H'*y0;
122+
tmp = A\tmp;
123+
score = y0 - H*tmp;
124+
125+
126+

0 commit comments

Comments
 (0)