-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathlearn_basis.m
90 lines (70 loc) · 2.24 KB
/
learn_basis.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
function B = learn_basis(X, S, l2norm, Binit)
% Learning basis using Lagrange dual (with basis normalization)
%
% This code solves the following problem:
%
% minimize_B 0.5*||X - B*S||^2
% subject to ||B(:,j)||_2 <= l2norm, forall j=1...size(S,1)
%
% The detail of the algorithm is described in the following paper:
% 'Efficient Sparse Codig Algorithms', Honglak Lee, Alexis Battle, Rajat Raina, Andrew Y. Ng,
% Advances in Neural Information Processing Systems (NIPS) 19, 2007
%
% Written by Honglak Lee <[email protected]>
% Copyright 2007 by Honglak Lee, Alexis Battle, Rajat Raina, and Andrew Y. Ng
L = size(X,1);
N = size(X,2);
M = size(S, 1);
% tic
SSt = S*S';
XSt = X*S';
if exist('Binit', 'var')
dual_lambda = diag(Binit\XSt - SSt);
else
dual_lambda = 10*abs(rand(M,1)); % any arbitrary initialization should be ok.
end
c = l2norm^2;
trXXt = sum(sum(X.^2));
lb=zeros(size(dual_lambda));
options = optimset('GradObj','on', 'Hessian','on','Display','off');
% options = optimset('GradObj','on', 'Hessian','on', 'TolFun', 1e-7);
[x, fval, exitflag, output] = fmincon(@(x) fobj_basis_dual(x, SSt, XSt, X, c, trXXt), dual_lambda, [], [], [], [], lb, [], [], options);
% output.iterations
fval_opt = -0.5*N*fval;
dual_lambda= x;
Bt = (SSt+diag(dual_lambda)) \ XSt';
B_dual= Bt';
fobjective_dual = fval_opt;
B= B_dual;
% fobjective = fobjective_dual;
% toc
return;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [f,g,H] = fobj_basis_dual(dual_lambda, SSt, XSt, X, c, trXXt)
% Compute the objective function value at x
L= size(XSt,1);
M= length(dual_lambda);
SSt_inv = inv(SSt + diag(dual_lambda));
% trXXt = sum(sum(X.^2));
if L>M
% (M*M)*((M*L)*(L*M)) => MLM + MMM = O(M^2(M+L))
f = -trace(SSt_inv*(XSt'*XSt))+trXXt-c*sum(dual_lambda);
else
% (L*M)*(M*M)*(M*L) => LMM + LML = O(LM(M+L))
f = -trace(XSt*SSt_inv*XSt')+trXXt-c*sum(dual_lambda);
end
f= -f;
if nargout > 1 % fun called with two output arguments
% Gradient of the function evaluated at x
g = zeros(M,1);
temp = XSt*SSt_inv;
g = sum(temp.^2) - c;
g= -g;
if nargout > 2
% Hessian evaluated at x
% H = -2.*((SSt_inv*XSt'*XSt*SSt_inv).*SSt_inv);
H = -2.*((temp'*temp).*SSt_inv);
H = -H;
end
end
return