-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathRPI_unknownQR_learning_continuous.m
76 lines (62 loc) · 2.03 KB
/
RPI_unknownQR_learning_continuous.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
% This scripts learns the near-optimal control policy using the collected
% data
clear;
clc;
global A B C Q R D F n2 n3 n4 n m K p;
% Read the collected data
load(['data_MIMO.mat']);
[n, m] = size(B);
q1 = length(D);
q2 = length(F);
[~,p] = size(C);
n2 = m+n+1;
n3 = n2*(n2+1)/2;
n4 = n3*(n3+1)/2;
Kest = {K}; % Store the obtained control gain in each iteration
Gest = {}; % Store the estimated G in each iteration
imax = 20; % the total number of learning iterations
i = 1;
% Check stability of the initial control policy
Mat_stab = stochastic_sys_mat(K);
if ~all(eig(Mat_stab)<0)
disp("Instability");
end
% Find optimal solution using the model-based policy iteration
Ktrue = {K};
Ptrue = {};
for i=1:imax
% Policy evaluation
Ptrue{end+1} = PI(Ktrue{i});
% Policy improvement
Ktrue{end+1} = (F{1}'*Ptrue{end}*F{1} + R)\B'*Ptrue{end};
end
Pstar = Ptrue{end};
Kstar = Ktrue{end};
% Learn the near-optimal control policy from data using Algo. 1
inv_Psi_Psi = Psi_Psi\eye(n3);
T_learning = 100;
for i=1:imax
% Check the stability of control gain Kest{i}
Mat_stab = stochastic_sys_mat(Kest{i});
if all(all(isnan(Mat_stab))) || ~all(eig(Mat_stab)<0)
disp("Instability");
instab = 1;
break;
end
% Solve the ODE on interval [0,T_learning]
theta0 = sm2vec(zeros(n2));
[tt,theta] = ode45(@(t,x) Learning_ODE(t,x,Kest{end},inv_Psi_Psi,I_Psi_xx,Psi_r),[0,T_learning],theta0);
% Extract Ghat, Gest and Kest from theta
THETA = vec2sm(theta(end,:),n2);
Ghat = THETA(1:m+n,1:m+n);
Gest{end+1} = Ghat;
Kest{end+1} = Ghat(n+1:end,n+1:end)\Ghat(n+1:end,1:n);
end
% Save the learned results into file
save(['results_using_data_MIMO.mat']);
function dx = Learning_ODE(t,x,K,data1,data2,data3)
global n2 m n;
X = vec2sm(x,n2);
dx = data1*(data2*vec(X(1:n,1:n)- X(1:n,n+1:m+n)*K - K'*X(1:n,n+1:m+n)'...
+K'*X(n+1:m+n,n+1:m+n)*K)+data3);
end