Skip to content

Commit fd42187

Browse files
authored
Merge pull request #364 from UCL-CCS/iso_sparse_scalibility
more scalable isotropic sparse grid
2 parents 2d4b13e + e2b5fa4 commit fd42187

18 files changed

+256
-309
lines changed

easyvvuq/actions/action_statuses.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ class ActionPool:
4545
An instance of `Actions` containing things to be done as part of the simulation.
4646
inits: iterable
4747
Initial inputs to be passed to each `Actions` representing a sample. Will usually contain
48-
dictionaries with the following information: {'run_id': ..., 'campaign_dir': ...,
48+
dictionaries with the following information: {'run_id': ..., 'campaign_dir': ...,
4949
'run_info': ...}.
5050
sequential: bool
5151
Will run the actions sequentially.
@@ -70,7 +70,7 @@ def start(self, pool=None):
7070
Returns
7171
-------
7272
ActionPool
73-
Starts execution and returns a reference to itself for tracking progress
73+
Starts execution and returns a reference to itself for tracking progress
7474
and for collation.
7575
"""
7676
if pool is None:
@@ -92,7 +92,7 @@ def progress(self):
9292
Returns
9393
-------
9494
dict
95-
A dictionary with four keys - 'ready', 'active' and 'finished', 'failed'.
95+
A dictionary with four keys - 'ready', 'active' and 'finished', 'failed'.
9696
Values under "ready" correspond to `Actions` waiting for execution, "active"
9797
corresponds to the number of currently running tasks.
9898
"""
@@ -114,7 +114,7 @@ def progress(self):
114114

115115
def add_collate_callback(self, fn):
116116
"""Adds a callback to be called after collation is done.
117-
117+
118118
Parameters
119119
----------
120120
fn - A callable that takes previous as it's only input.

easyvvuq/actions/execute_local.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -81,13 +81,13 @@ def start(self, previous=None):
8181
level3_dir = "runs_{}-{}/".format(level3_a, level3_b)
8282
level4_dir = "runs_{}-{}/".format(level4_a, level4_b)
8383
level5_dir = "run_{}".format(int(run_id))
84-
84+
8585
if self.flatten:
8686
path = os.path.join(self.root, previous['campaign_dir'], 'runs', level5_dir)
8787
else:
8888
path = os.path.join(self.root, previous['campaign_dir'], 'runs',
8989
level1_dir, level2_dir, level3_dir, level4_dir, level5_dir)
90-
90+
9191
Path(path).mkdir(parents=True, exist_ok=True)
9292
previous['rundir'] = path
9393
self.result = previous
@@ -254,7 +254,7 @@ def set_wrapper(self, wrapper):
254254
Parameters
255255
----------
256256
wrapper: callable
257-
A function to call on each Action. Should pass through the return of the
257+
A function to call on each Action. Should pass through the return of the
258258
start method.
259259
"""
260260
self.wrapper = wrapper

easyvvuq/actions/execute_qcgpj.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,13 @@ class QCGPJPool(Executor):
122122
polling_interval: int
123123
An interval between queries to the QCG-PilotJob Manager service about state of the tasks, in seconds.
124124
"""
125-
def __init__(self, qcgpj_executor=None, template=None, template_params=None, polling_interval=1):
125+
126+
def __init__(
127+
self,
128+
qcgpj_executor=None,
129+
template=None,
130+
template_params=None,
131+
polling_interval=1):
126132
if qcgpj_executor is None:
127133
qcgpj_executor = QCGPJExecutor()
128134
if template is None:
@@ -268,6 +274,7 @@ class ExecuteQCGPJ:
268274
action: Action
269275
an action that will be decorated in order to enable parallel execution inside a QCG-PilotJob task.
270276
"""
277+
271278
def __init__(self, action):
272279
self._action = action
273280

easyvvuq/analysis/gp_analyse.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
"""Will create a Gaussian Process surrogate of your model. For
1+
"""Will create a Gaussian Process surrogate of your model. For
22
the sampler you can use the random sampler or the quasi-random
33
sampler. Don't forget to set the analysis class to GaussianProcessSurrogate
44
as is shown in the example below.
@@ -24,6 +24,7 @@
2424
from .results import AnalysisResults
2525
import numpy as np
2626

27+
2728
class GaussianProcessSurrogateResults(AnalysisResults):
2829
"""Gaussian process surrogate results class. You would never
2930
create this manually in normal use. It is meant to be returned as the
@@ -38,6 +39,7 @@ class GaussianProcessSurrogateResults(AnalysisResults):
3839
qoi: str
3940
Output variable name.
4041
"""
42+
4143
def __init__(self, gp, parameters, qoi):
4244
self.gp = gp
4345
self.parameters = parameters
@@ -97,8 +99,8 @@ def analyse(self, data_frame=None):
9799
`GaussianProcessSurrogateResults` instance. Used to interact with the surrogate
98100
model and to possibly access other functionality provided by the fitted model.
99101
"""
100-
x = data_frame[self.attr_cols].values #lgtm [py/hash-unhashable-value]
101-
y = data_frame[self.target_cols].values #lgtm [py/hash-unhashable-value]
102+
x = data_frame[self.attr_cols].values # lgtm [py/hash-unhashable-value]
103+
y = data_frame[self.target_cols].values # lgtm [py/hash-unhashable-value]
102104
gp = GaussianProcessRegressor(**self.kwargs)
103105
gp = gp.fit(x, y)
104106
return GaussianProcessSurrogateResults(gp, self.attr_cols, self.target_cols)

easyvvuq/analysis/mcmc.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ def plot_hist(self, input_parameter, chain=None, skip=0, merge=True):
5050

5151
def plot_chains(self, input_parameter, chain=None):
5252
"""Will plot the chains with the input parameter value in the y axis.
53-
53+
5454
Parameters
5555
----------
5656
input_parameter: str
@@ -74,6 +74,7 @@ class MCMCAnalysis(BaseAnalysisElement):
7474
sampler: MCMCSampler
7575
An instance of MCMCSampler used to generate MCMC samples.
7676
"""
77+
7778
def __init__(self, sampler):
7879
self.sampler = sampler
7980

easyvvuq/analysis/pce_analysis.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,8 @@ def swap(x):
138138
else:
139139
return x[0]
140140
values = np.array([inputs[key] for key in self.inputs])
141-
results = dict([(qoi, swap((self.raw_data['fit'][qoi](*values)).T)) for qoi in self.qois])
141+
results = dict([(qoi, swap((self.raw_data['fit'][qoi](*values)).T))
142+
for qoi in self.qois])
142143
return results
143144
return surrogate_fn
144145

easyvvuq/analysis/sc_analysis.py

+70-50
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,7 @@ def analyse(self, data_frame=None, compute_moments=True, compute_Sobols=True):
246246
std_k = np.sqrt(var_k)
247247
else:
248248
pce_coefs = self.SC2PCE(self.samples[qoi_k])
249-
mean_k, var_k = self.get_pce_stats(self.l_norm, pce_coefs, self.comb_coef)
249+
mean_k, var_k, _ = self.get_pce_stats(self.l_norm, pce_coefs, self.comb_coef)
250250
std_k = np.sqrt(var_k)
251251

252252
# compute statistical moments
@@ -325,7 +325,7 @@ def adapt_dimension(self, qoi, data_frame, store_stats_history=True,
325325
name of the refinement error, default is 'surplus'. In this case the
326326
error is based on the hierarchical surplus, which is an interpolation
327327
based error. Another possibility is 'var',
328-
in which case the error is based on the difference in the
328+
in which case the error is based on the difference in the
329329
variance between the current estimate and the estimate obtained
330330
when a particular candidate direction is added.
331331
"""
@@ -343,7 +343,7 @@ def adapt_dimension(self, qoi, data_frame, store_stats_history=True,
343343
self.wi_1d = self.sampler.wi_1d
344344
self.pce_coefs = self.SC2PCE(samples, verbose=True, l_norm=all_idx,
345345
xi_d=self.sampler.xi_d)
346-
_, var_l = self.get_pce_stats(self.l_norm, self.pce_coefs, self.comb_coef)
346+
_, var_l, _ = self.get_pce_stats(self.l_norm, self.pce_coefs, self.comb_coef)
347347

348348
# the currently accepted grid points
349349
xi_d_accepted = self.sampler.generate_grid(self.l_norm)
@@ -378,7 +378,7 @@ def adapt_dimension(self, qoi, data_frame, store_stats_history=True,
378378
candidate_l_norm = np.concatenate((self.l_norm, l.reshape([1, self.N])))
379379
# now we must recompute the combination coefficients
380380
c_l = self.compute_comb_coef(l_norm=candidate_l_norm)
381-
_, var_candidate_l = self.get_pce_stats(candidate_l_norm, self.pce_coefs, c_l)
381+
_, var_candidate_l, _ = self.get_pce_stats(candidate_l_norm, self.pce_coefs, c_l)
382382
#error in var
383383
error[tuple(l)] = np.linalg.norm(var_candidate_l - var_l, np.inf)
384384
else:
@@ -413,7 +413,7 @@ def adapt_dimension(self, qoi, data_frame, store_stats_history=True,
413413
# mean_f, var_f = self.get_moments(qoi)
414414
logging.debug('Storing moments of iteration %d' % self.sampler.nadaptations)
415415
pce_coefs = self.SC2PCE(samples, verbose=True)
416-
mean_f, var_f = self.get_pce_stats(self.l_norm, pce_coefs, self.comb_coef)
416+
mean_f, var_f, _ = self.get_pce_stats(self.l_norm, pce_coefs, self.comb_coef)
417417
self.mean_history.append(mean_f)
418418
self.std_history.append(var_f)
419419
logging.debug('done')
@@ -889,8 +889,8 @@ def SC2PCE(self, samples, verbose=True, **kwargs):
889889

890890
# orthogonal polynomial generated by chaospy
891891
phi_k = [cp.expansion.stieltjes(k[n] - 1,
892-
dist=self.sampler.params_distribution[n],
893-
normed=True)[-1] for n in range(self.N)]
892+
dist=self.sampler.params_distribution[n],
893+
normed=True)[-1] for n in range(self.N)]
894894

895895
# the polynomial order of each integrand phi_k*a_j = (k - 1) + (number of
896896
# colloc. points - 1)
@@ -950,8 +950,49 @@ def SC2PCE(self, samples, verbose=True, **kwargs):
950950
logging.debug('done')
951951
return pce_coefs
952952

953+
def generalized_pce_coefs(self, l_norm, pce_coefs, comb_coef):
954+
"""
955+
Computes the generalized PCE coefficients, defined as the linear combibation
956+
of PCE coefficients which make it possible to write the dimension-adaptive
957+
PCE expansion in standard form. See DOI: 10.13140/RG.2.2.18085.58083/1
958+
959+
Parameters
960+
----------
961+
l_norm : array
962+
array of quadrature order multi indices
963+
pce_coefs : tuple
964+
tuple of PCE coefficients computed by SC2PCE subroutine
965+
comb_coef : tuple
966+
tuple of combination coefficients computed by compute_comb_coef
967+
968+
Returns
969+
-------
970+
gen_pce_coefs : tuple
971+
The generalized PCE coefficients, indexed per multi index.
972+
973+
"""
974+
assert self.sparse, "Generalized PCE coeffcients are computed only for sparse grids"
975+
976+
# the set of all forward neighbours of l: {k | k >= l}
977+
F_l = {}
978+
# the generalized PCE coefs, which turn the adaptive PCE into a standard PCE expansion
979+
gen_pce_coefs = {}
980+
for l in l_norm:
981+
# {indices of k | k >= l}
982+
idx = np.where((l <= l_norm).all(axis=1))[0]
983+
F_l[tuple(l)] = l_norm[idx]
984+
985+
# the generalized PCE coefs are comb_coef[k] * pce_coefs[k][l], summed over k
986+
# for a fixed l
987+
gen_pce_coefs[tuple(l)] = 0.0
988+
for k in F_l[tuple(l)]:
989+
gen_pce_coefs[tuple(l)] += comb_coef[tuple(k)] * pce_coefs[tuple(k)][tuple(l)]
990+
991+
return gen_pce_coefs
992+
953993
def get_pce_stats(self, l_norm, pce_coefs, comb_coef):
954-
"""Compute the mean and the variance based on the PCE coefficients
994+
"""Compute the mean and the variance based on the generalized PCE coefficients
995+
See DOI: 10.13140/RG.2.2.18085.58083/1
955996
956997
Parameters
957998
----------
@@ -967,30 +1008,28 @@ def get_pce_stats(self, l_norm, pce_coefs, comb_coef):
9671008
tuple with mean and variance based on the PCE coefficients
9681009
"""
9691010

970-
# Compute the PCE mean
971-
k1 = tuple(np.ones(self.N, dtype=int))
972-
mean = 0.0
973-
for l in l_norm:
974-
mean = mean + comb_coef[tuple(l)] * pce_coefs[tuple(l)][k1]
1011+
gen_pce_coefs = self.generalized_pce_coefs(l_norm, pce_coefs, comb_coef)
9751012

1013+
# with the generalized pce coefs, the standard PCE formulas for the mean and var
1014+
# can be used for the dimension-adaptive PCE
1015+
1016+
# the PCE mean is just the 1st generalized PCE coef
1017+
l1 = tuple(np.ones(self.N, dtype=int))
1018+
mean = gen_pce_coefs[l1]
1019+
1020+
# the variance is the sum of the squared generalized PCE coefs, excluding the 1st coef
9761021
D = 0.0
977-
for k in l_norm[1:]:
978-
var_k = 0.0
979-
for l in l_norm[1:]:
980-
if tuple(k) in pce_coefs[tuple(l)].keys():
981-
eta_k = pce_coefs[tuple(l)][tuple(k)]
982-
var_k = var_k + comb_coef[tuple(l)] * eta_k
983-
var_k = var_k**2
984-
D = D + var_k
1022+
for l in l_norm[1:]:
1023+
D += gen_pce_coefs[tuple(l)] ** 2
9851024

986-
return mean, D
1025+
return mean, D, gen_pce_coefs
9871026

9881027
def get_pce_sobol_indices(self, qoi, typ='first_order', **kwargs):
9891028
"""Computes Sobol indices using Polynomials Chaos coefficients. These
9901029
coefficients are computed from the SC expansion via a transformation
9911030
of basis (SC2PCE subroutine). This works better than computing the
9921031
Sobol indices directly from the SC expansion in the case of the
993-
dimension-adaptive sampler.
1032+
dimension-adaptive sampler. See DOI: 10.13140/RG.2.2.18085.58083/1
9941033
9951034
Method: J.D. Jakeman et al, "Adaptive multi-index collocation
9961035
for uncertainty quantification and sensitivity analysis", 2019.
@@ -1021,27 +1060,9 @@ def get_pce_sobol_indices(self, qoi, typ='first_order', **kwargs):
10211060
samples = self.samples[qoi]
10221061
N_qoi = self.N_qoi
10231062

1024-
# compute the PCE coefficients
1063+
# compute the (generalized) PCE coefficients and stats
10251064
self.pce_coefs = self.SC2PCE(samples)
1026-
1027-
# Compute the PCE mean (not really required)
1028-
k1 = tuple(np.ones(self.N, dtype=int))
1029-
mean = 0.0
1030-
for l in self.l_norm:
1031-
mean = mean + self.comb_coef[tuple(l)] * self.pce_coefs[tuple(l)][k1]
1032-
1033-
# dict to hold the variance per multi index k
1034-
var = {}
1035-
# D = total PCE variance
1036-
D = 0.0
1037-
for k in self.l_norm[1:]:
1038-
var_k = 0.0
1039-
for l in self.l_norm[1:]:
1040-
if tuple(k) in self.pce_coefs[tuple(l)].keys():
1041-
eta_k = self.pce_coefs[tuple(l)][tuple(k)]
1042-
var_k = var_k + self.comb_coef[tuple(l)] * eta_k
1043-
var[tuple(k)] = var_k**2
1044-
D = D + var[tuple(k)]
1065+
mean, D, gen_pce_coefs = self.get_pce_stats(self.l_norm, self.pce_coefs, self.comb_coef)
10451066

10461067
logging.debug('Computing Sobol indices...')
10471068
# Universe = (0, 1, ..., N - 1)
@@ -1091,7 +1112,7 @@ def get_pce_sobol_indices(self, qoi, typ='first_order', **kwargs):
10911112
logging.debug('Multi indices of dimension %s are %s' % (u, k))
10921113
# the partial variance of u is the sum of all variances index by k
10931114
for k_u in k:
1094-
D_u[u] = D_u[u] + var[tuple(k_u)]
1115+
D_u[u] = D_u[u] + gen_pce_coefs[tuple(k_u)] ** 2
10951116

10961117
# normalize D_u by total variance D to get the Sobol index
10971118
S_u[u] = D_u[u] / D
@@ -1284,13 +1305,12 @@ def get_uncertainty_amplification(self, qoi):
12841305
CV_out = np.mean(CV_out[idx])
12851306
blowup = CV_out / CV_in
12861307

1287-
logging.debug('-----------------')
1288-
logging.debug('Mean CV input = %.4f %%' % (100 * CV_in, ))
1289-
logging.debug('Mean CV output = %.4f %%' % (100 * CV_out, ))
1290-
logging.debug(
1291-
'Uncertainty amplification factor = %.4f/%.4f = %.4f' %
1308+
print('-----------------')
1309+
print('Mean CV input = %.4f %%' % (100 * CV_in, ))
1310+
print('Mean CV output = %.4f %%' % (100 * CV_out, ))
1311+
print('Uncertainty amplification factor = %.4f/%.4f = %.4f' %
12921312
(CV_out, CV_in, blowup))
1293-
logging.debug('-----------------')
1313+
print('-----------------')
12941314

12951315
return blowup
12961316

easyvvuq/campaign.py

+10-4
Original file line numberDiff line numberDiff line change
@@ -573,8 +573,11 @@ def get_collation_result(self, last_iteration=False):
573573
iteration = self._active_sampler.iteration - 1
574574
else:
575575
iteration = -1
576-
return self.campaign_db.get_results(self._active_app['name'], self._active_sampler_id,
577-
status=easyvvuq.constants.Status.COLLATED, iteration=iteration)
576+
return self.campaign_db.get_results(
577+
self._active_app['name'],
578+
self._active_sampler_id,
579+
status=easyvvuq.constants.Status.COLLATED,
580+
iteration=iteration)
578581

579582
def get_invalid_runs(self, last_iteration=False):
580583
"""Return dataframe containing all results marked as INVALID.
@@ -595,8 +598,11 @@ def get_invalid_runs(self, last_iteration=False):
595598
iteration = self._active_sampler.iteration - 1
596599
else:
597600
iteration = -1
598-
return self.campaign_db.get_results(self._active_app['name'], self._active_sampler_id,
599-
status=easyvvuq.constants.Status.INVALID, iteration=iteration)
601+
return self.campaign_db.get_results(
602+
self._active_app['name'],
603+
self._active_sampler_id,
604+
status=easyvvuq.constants.Status.INVALID,
605+
iteration=iteration)
600606

601607
def apply_analysis(self, analysis):
602608
"""Run the `analysis` element on the output of the last run collation.

easyvvuq/decoders/simple_csv.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ class SimpleCSV:
4242
ouput_columns: list
4343
A list of column names that will be selected to appear in the output.
4444
"""
45+
4546
def __init__(self, target_filename, output_columns, dialect='excel'):
4647
if len(output_columns) == 0:
4748
msg = "output_columns cannot be empty."
@@ -56,7 +57,7 @@ def __init__(self, target_filename, output_columns, dialect='excel'):
5657
def _get_output_path(run_info=None, outfile=None):
5758
"""Constructs absolute path from the `target_filename` and the `run_dir` parameter
5859
in the `run_info` retrieved from the database.
59-
60+
6061
Parameters
6162
----------
6263
run_info: dict

0 commit comments

Comments
 (0)