Skip to content

Commit

Permalink
Merge pull request #364 from UCL-CCS/iso_sparse_scalibility
Browse files Browse the repository at this point in the history
more scalable isotropic sparse grid
  • Loading branch information
djgroen authored Jun 27, 2022
2 parents 2d4b13e + e2b5fa4 commit fd42187
Show file tree
Hide file tree
Showing 18 changed files with 256 additions and 309 deletions.
8 changes: 4 additions & 4 deletions easyvvuq/actions/action_statuses.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ class ActionPool:
An instance of `Actions` containing things to be done as part of the simulation.
inits: iterable
Initial inputs to be passed to each `Actions` representing a sample. Will usually contain
dictionaries with the following information: {'run_id': ..., 'campaign_dir': ...,
dictionaries with the following information: {'run_id': ..., 'campaign_dir': ...,
'run_info': ...}.
sequential: bool
Will run the actions sequentially.
Expand All @@ -70,7 +70,7 @@ def start(self, pool=None):
Returns
-------
ActionPool
Starts execution and returns a reference to itself for tracking progress
Starts execution and returns a reference to itself for tracking progress
and for collation.
"""
if pool is None:
Expand All @@ -92,7 +92,7 @@ def progress(self):
Returns
-------
dict
A dictionary with four keys - 'ready', 'active' and 'finished', 'failed'.
A dictionary with four keys - 'ready', 'active' and 'finished', 'failed'.
Values under "ready" correspond to `Actions` waiting for execution, "active"
corresponds to the number of currently running tasks.
"""
Expand All @@ -114,7 +114,7 @@ def progress(self):

def add_collate_callback(self, fn):
"""Adds a callback to be called after collation is done.
Parameters
----------
fn - A callable that takes previous as it's only input.
Expand Down
6 changes: 3 additions & 3 deletions easyvvuq/actions/execute_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,13 +81,13 @@ def start(self, previous=None):
level3_dir = "runs_{}-{}/".format(level3_a, level3_b)
level4_dir = "runs_{}-{}/".format(level4_a, level4_b)
level5_dir = "run_{}".format(int(run_id))

if self.flatten:
path = os.path.join(self.root, previous['campaign_dir'], 'runs', level5_dir)
else:
path = os.path.join(self.root, previous['campaign_dir'], 'runs',
level1_dir, level2_dir, level3_dir, level4_dir, level5_dir)

Path(path).mkdir(parents=True, exist_ok=True)
previous['rundir'] = path
self.result = previous
Expand Down Expand Up @@ -254,7 +254,7 @@ def set_wrapper(self, wrapper):
Parameters
----------
wrapper: callable
A function to call on each Action. Should pass through the return of the
A function to call on each Action. Should pass through the return of the
start method.
"""
self.wrapper = wrapper
Expand Down
9 changes: 8 additions & 1 deletion easyvvuq/actions/execute_qcgpj.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,13 @@ class QCGPJPool(Executor):
polling_interval: int
An interval between queries to the QCG-PilotJob Manager service about state of the tasks, in seconds.
"""
def __init__(self, qcgpj_executor=None, template=None, template_params=None, polling_interval=1):

def __init__(
self,
qcgpj_executor=None,
template=None,
template_params=None,
polling_interval=1):
if qcgpj_executor is None:
qcgpj_executor = QCGPJExecutor()
if template is None:
Expand Down Expand Up @@ -268,6 +274,7 @@ class ExecuteQCGPJ:
action: Action
an action that will be decorated in order to enable parallel execution inside a QCG-PilotJob task.
"""

def __init__(self, action):
self._action = action

Expand Down
8 changes: 5 additions & 3 deletions easyvvuq/analysis/gp_analyse.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Will create a Gaussian Process surrogate of your model. For
"""Will create a Gaussian Process surrogate of your model. For
the sampler you can use the random sampler or the quasi-random
sampler. Don't forget to set the analysis class to GaussianProcessSurrogate
as is shown in the example below.
Expand All @@ -24,6 +24,7 @@
from .results import AnalysisResults
import numpy as np


class GaussianProcessSurrogateResults(AnalysisResults):
"""Gaussian process surrogate results class. You would never
create this manually in normal use. It is meant to be returned as the
Expand All @@ -38,6 +39,7 @@ class GaussianProcessSurrogateResults(AnalysisResults):
qoi: str
Output variable name.
"""

def __init__(self, gp, parameters, qoi):
self.gp = gp
self.parameters = parameters
Expand Down Expand Up @@ -97,8 +99,8 @@ def analyse(self, data_frame=None):
`GaussianProcessSurrogateResults` instance. Used to interact with the surrogate
model and to possibly access other functionality provided by the fitted model.
"""
x = data_frame[self.attr_cols].values #lgtm [py/hash-unhashable-value]
y = data_frame[self.target_cols].values #lgtm [py/hash-unhashable-value]
x = data_frame[self.attr_cols].values # lgtm [py/hash-unhashable-value]
y = data_frame[self.target_cols].values # lgtm [py/hash-unhashable-value]
gp = GaussianProcessRegressor(**self.kwargs)
gp = gp.fit(x, y)
return GaussianProcessSurrogateResults(gp, self.attr_cols, self.target_cols)
3 changes: 2 additions & 1 deletion easyvvuq/analysis/mcmc.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def plot_hist(self, input_parameter, chain=None, skip=0, merge=True):

def plot_chains(self, input_parameter, chain=None):
"""Will plot the chains with the input parameter value in the y axis.
Parameters
----------
input_parameter: str
Expand All @@ -74,6 +74,7 @@ class MCMCAnalysis(BaseAnalysisElement):
sampler: MCMCSampler
An instance of MCMCSampler used to generate MCMC samples.
"""

def __init__(self, sampler):
self.sampler = sampler

Expand Down
3 changes: 2 additions & 1 deletion easyvvuq/analysis/pce_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,8 @@ def swap(x):
else:
return x[0]
values = np.array([inputs[key] for key in self.inputs])
results = dict([(qoi, swap((self.raw_data['fit'][qoi](*values)).T)) for qoi in self.qois])
results = dict([(qoi, swap((self.raw_data['fit'][qoi](*values)).T))
for qoi in self.qois])
return results
return surrogate_fn

Expand Down
120 changes: 70 additions & 50 deletions easyvvuq/analysis/sc_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ def analyse(self, data_frame=None, compute_moments=True, compute_Sobols=True):
std_k = np.sqrt(var_k)
else:
pce_coefs = self.SC2PCE(self.samples[qoi_k])
mean_k, var_k = self.get_pce_stats(self.l_norm, pce_coefs, self.comb_coef)
mean_k, var_k, _ = self.get_pce_stats(self.l_norm, pce_coefs, self.comb_coef)
std_k = np.sqrt(var_k)

# compute statistical moments
Expand Down Expand Up @@ -325,7 +325,7 @@ def adapt_dimension(self, qoi, data_frame, store_stats_history=True,
name of the refinement error, default is 'surplus'. In this case the
error is based on the hierarchical surplus, which is an interpolation
based error. Another possibility is 'var',
in which case the error is based on the difference in the
in which case the error is based on the difference in the
variance between the current estimate and the estimate obtained
when a particular candidate direction is added.
"""
Expand All @@ -343,7 +343,7 @@ def adapt_dimension(self, qoi, data_frame, store_stats_history=True,
self.wi_1d = self.sampler.wi_1d
self.pce_coefs = self.SC2PCE(samples, verbose=True, l_norm=all_idx,
xi_d=self.sampler.xi_d)
_, var_l = self.get_pce_stats(self.l_norm, self.pce_coefs, self.comb_coef)
_, var_l, _ = self.get_pce_stats(self.l_norm, self.pce_coefs, self.comb_coef)

# the currently accepted grid points
xi_d_accepted = self.sampler.generate_grid(self.l_norm)
Expand Down Expand Up @@ -378,7 +378,7 @@ def adapt_dimension(self, qoi, data_frame, store_stats_history=True,
candidate_l_norm = np.concatenate((self.l_norm, l.reshape([1, self.N])))
# now we must recompute the combination coefficients
c_l = self.compute_comb_coef(l_norm=candidate_l_norm)
_, var_candidate_l = self.get_pce_stats(candidate_l_norm, self.pce_coefs, c_l)
_, var_candidate_l, _ = self.get_pce_stats(candidate_l_norm, self.pce_coefs, c_l)
#error in var
error[tuple(l)] = np.linalg.norm(var_candidate_l - var_l, np.inf)
else:
Expand Down Expand Up @@ -413,7 +413,7 @@ def adapt_dimension(self, qoi, data_frame, store_stats_history=True,
# mean_f, var_f = self.get_moments(qoi)
logging.debug('Storing moments of iteration %d' % self.sampler.nadaptations)
pce_coefs = self.SC2PCE(samples, verbose=True)
mean_f, var_f = self.get_pce_stats(self.l_norm, pce_coefs, self.comb_coef)
mean_f, var_f, _ = self.get_pce_stats(self.l_norm, pce_coefs, self.comb_coef)
self.mean_history.append(mean_f)
self.std_history.append(var_f)
logging.debug('done')
Expand Down Expand Up @@ -889,8 +889,8 @@ def SC2PCE(self, samples, verbose=True, **kwargs):

# orthogonal polynomial generated by chaospy
phi_k = [cp.expansion.stieltjes(k[n] - 1,
dist=self.sampler.params_distribution[n],
normed=True)[-1] for n in range(self.N)]
dist=self.sampler.params_distribution[n],
normed=True)[-1] for n in range(self.N)]

# the polynomial order of each integrand phi_k*a_j = (k - 1) + (number of
# colloc. points - 1)
Expand Down Expand Up @@ -950,8 +950,49 @@ def SC2PCE(self, samples, verbose=True, **kwargs):
logging.debug('done')
return pce_coefs

def generalized_pce_coefs(self, l_norm, pce_coefs, comb_coef):
"""
Computes the generalized PCE coefficients, defined as the linear combibation
of PCE coefficients which make it possible to write the dimension-adaptive
PCE expansion in standard form. See DOI: 10.13140/RG.2.2.18085.58083/1
Parameters
----------
l_norm : array
array of quadrature order multi indices
pce_coefs : tuple
tuple of PCE coefficients computed by SC2PCE subroutine
comb_coef : tuple
tuple of combination coefficients computed by compute_comb_coef
Returns
-------
gen_pce_coefs : tuple
The generalized PCE coefficients, indexed per multi index.
"""
assert self.sparse, "Generalized PCE coeffcients are computed only for sparse grids"

# the set of all forward neighbours of l: {k | k >= l}
F_l = {}
# the generalized PCE coefs, which turn the adaptive PCE into a standard PCE expansion
gen_pce_coefs = {}
for l in l_norm:
# {indices of k | k >= l}
idx = np.where((l <= l_norm).all(axis=1))[0]
F_l[tuple(l)] = l_norm[idx]

# the generalized PCE coefs are comb_coef[k] * pce_coefs[k][l], summed over k
# for a fixed l
gen_pce_coefs[tuple(l)] = 0.0
for k in F_l[tuple(l)]:
gen_pce_coefs[tuple(l)] += comb_coef[tuple(k)] * pce_coefs[tuple(k)][tuple(l)]

return gen_pce_coefs

def get_pce_stats(self, l_norm, pce_coefs, comb_coef):
"""Compute the mean and the variance based on the PCE coefficients
"""Compute the mean and the variance based on the generalized PCE coefficients
See DOI: 10.13140/RG.2.2.18085.58083/1
Parameters
----------
Expand All @@ -967,30 +1008,28 @@ def get_pce_stats(self, l_norm, pce_coefs, comb_coef):
tuple with mean and variance based on the PCE coefficients
"""

# Compute the PCE mean
k1 = tuple(np.ones(self.N, dtype=int))
mean = 0.0
for l in l_norm:
mean = mean + comb_coef[tuple(l)] * pce_coefs[tuple(l)][k1]
gen_pce_coefs = self.generalized_pce_coefs(l_norm, pce_coefs, comb_coef)

# with the generalized pce coefs, the standard PCE formulas for the mean and var
# can be used for the dimension-adaptive PCE

# the PCE mean is just the 1st generalized PCE coef
l1 = tuple(np.ones(self.N, dtype=int))
mean = gen_pce_coefs[l1]

# the variance is the sum of the squared generalized PCE coefs, excluding the 1st coef
D = 0.0
for k in l_norm[1:]:
var_k = 0.0
for l in l_norm[1:]:
if tuple(k) in pce_coefs[tuple(l)].keys():
eta_k = pce_coefs[tuple(l)][tuple(k)]
var_k = var_k + comb_coef[tuple(l)] * eta_k
var_k = var_k**2
D = D + var_k
for l in l_norm[1:]:
D += gen_pce_coefs[tuple(l)] ** 2

return mean, D
return mean, D, gen_pce_coefs

def get_pce_sobol_indices(self, qoi, typ='first_order', **kwargs):
"""Computes Sobol indices using Polynomials Chaos coefficients. These
coefficients are computed from the SC expansion via a transformation
of basis (SC2PCE subroutine). This works better than computing the
Sobol indices directly from the SC expansion in the case of the
dimension-adaptive sampler.
dimension-adaptive sampler. See DOI: 10.13140/RG.2.2.18085.58083/1
Method: J.D. Jakeman et al, "Adaptive multi-index collocation
for uncertainty quantification and sensitivity analysis", 2019.
Expand Down Expand Up @@ -1021,27 +1060,9 @@ def get_pce_sobol_indices(self, qoi, typ='first_order', **kwargs):
samples = self.samples[qoi]
N_qoi = self.N_qoi

# compute the PCE coefficients
# compute the (generalized) PCE coefficients and stats
self.pce_coefs = self.SC2PCE(samples)

# Compute the PCE mean (not really required)
k1 = tuple(np.ones(self.N, dtype=int))
mean = 0.0
for l in self.l_norm:
mean = mean + self.comb_coef[tuple(l)] * self.pce_coefs[tuple(l)][k1]

# dict to hold the variance per multi index k
var = {}
# D = total PCE variance
D = 0.0
for k in self.l_norm[1:]:
var_k = 0.0
for l in self.l_norm[1:]:
if tuple(k) in self.pce_coefs[tuple(l)].keys():
eta_k = self.pce_coefs[tuple(l)][tuple(k)]
var_k = var_k + self.comb_coef[tuple(l)] * eta_k
var[tuple(k)] = var_k**2
D = D + var[tuple(k)]
mean, D, gen_pce_coefs = self.get_pce_stats(self.l_norm, self.pce_coefs, self.comb_coef)

logging.debug('Computing Sobol indices...')
# Universe = (0, 1, ..., N - 1)
Expand Down Expand Up @@ -1091,7 +1112,7 @@ def get_pce_sobol_indices(self, qoi, typ='first_order', **kwargs):
logging.debug('Multi indices of dimension %s are %s' % (u, k))
# the partial variance of u is the sum of all variances index by k
for k_u in k:
D_u[u] = D_u[u] + var[tuple(k_u)]
D_u[u] = D_u[u] + gen_pce_coefs[tuple(k_u)] ** 2

# normalize D_u by total variance D to get the Sobol index
S_u[u] = D_u[u] / D
Expand Down Expand Up @@ -1284,13 +1305,12 @@ def get_uncertainty_amplification(self, qoi):
CV_out = np.mean(CV_out[idx])
blowup = CV_out / CV_in

logging.debug('-----------------')
logging.debug('Mean CV input = %.4f %%' % (100 * CV_in, ))
logging.debug('Mean CV output = %.4f %%' % (100 * CV_out, ))
logging.debug(
'Uncertainty amplification factor = %.4f/%.4f = %.4f' %
print('-----------------')
print('Mean CV input = %.4f %%' % (100 * CV_in, ))
print('Mean CV output = %.4f %%' % (100 * CV_out, ))
print('Uncertainty amplification factor = %.4f/%.4f = %.4f' %
(CV_out, CV_in, blowup))
logging.debug('-----------------')
print('-----------------')

return blowup

Expand Down
14 changes: 10 additions & 4 deletions easyvvuq/campaign.py
Original file line number Diff line number Diff line change
Expand Up @@ -573,8 +573,11 @@ def get_collation_result(self, last_iteration=False):
iteration = self._active_sampler.iteration - 1
else:
iteration = -1
return self.campaign_db.get_results(self._active_app['name'], self._active_sampler_id,
status=easyvvuq.constants.Status.COLLATED, iteration=iteration)
return self.campaign_db.get_results(
self._active_app['name'],
self._active_sampler_id,
status=easyvvuq.constants.Status.COLLATED,
iteration=iteration)

def get_invalid_runs(self, last_iteration=False):
"""Return dataframe containing all results marked as INVALID.
Expand All @@ -595,8 +598,11 @@ def get_invalid_runs(self, last_iteration=False):
iteration = self._active_sampler.iteration - 1
else:
iteration = -1
return self.campaign_db.get_results(self._active_app['name'], self._active_sampler_id,
status=easyvvuq.constants.Status.INVALID, iteration=iteration)
return self.campaign_db.get_results(
self._active_app['name'],
self._active_sampler_id,
status=easyvvuq.constants.Status.INVALID,
iteration=iteration)

def apply_analysis(self, analysis):
"""Run the `analysis` element on the output of the last run collation.
Expand Down
3 changes: 2 additions & 1 deletion easyvvuq/decoders/simple_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ class SimpleCSV:
ouput_columns: list
A list of column names that will be selected to appear in the output.
"""

def __init__(self, target_filename, output_columns, dialect='excel'):
if len(output_columns) == 0:
msg = "output_columns cannot be empty."
Expand All @@ -56,7 +57,7 @@ def __init__(self, target_filename, output_columns, dialect='excel'):
def _get_output_path(run_info=None, outfile=None):
"""Constructs absolute path from the `target_filename` and the `run_dir` parameter
in the `run_info` retrieved from the database.
Parameters
----------
run_info: dict
Expand Down
Loading

0 comments on commit fd42187

Please sign in to comment.