Merge pull request #364 from UCL-CCS/iso_sparse_scalibility

more scalable isotropic sparse grid
UCL-CCS · Jun 27, 2022 · fd42187 · fd42187
2 parents 2d4b13e + e2b5fa4
commit fd42187
Show file tree

Hide file tree

Showing 18 changed files with 256 additions and 309 deletions.
diff --git a/easyvvuq/actions/action_statuses.py b/easyvvuq/actions/action_statuses.py
@@ -45,7 +45,7 @@ class ActionPool:
         An instance of `Actions` containing things to be done as part of the simulation.
     inits: iterable
         Initial inputs to be passed to each `Actions` representing a sample. Will usually contain
-        dictionaries with the following information: {'run_id': ..., 'campaign_dir': ..., 
+        dictionaries with the following information: {'run_id': ..., 'campaign_dir': ...,
         'run_info': ...}.
     sequential: bool
         Will run the actions sequentially.
@@ -70,7 +70,7 @@ def start(self, pool=None):
         Returns
         -------
         ActionPool
-            Starts execution and returns a reference to itself for tracking progress 
+            Starts execution and returns a reference to itself for tracking progress
             and for collation.
         """
         if pool is None:
@@ -92,7 +92,7 @@ def progress(self):
         Returns
         -------
         dict
-            A dictionary with four keys - 'ready', 'active' and 'finished', 'failed'. 
+            A dictionary with four keys - 'ready', 'active' and 'finished', 'failed'.
             Values under "ready" correspond to `Actions` waiting for execution, "active"
             corresponds to the number of currently running tasks.
         """
@@ -114,7 +114,7 @@ def progress(self):
 
     def add_collate_callback(self, fn):
         """Adds a callback to be called after collation is done.
-        
+
         Parameters
         ----------
         fn - A callable that takes previous as it's only input.

diff --git a/easyvvuq/actions/execute_local.py b/easyvvuq/actions/execute_local.py
@@ -81,13 +81,13 @@ def start(self, previous=None):
         level3_dir = "runs_{}-{}/".format(level3_a, level3_b)
         level4_dir = "runs_{}-{}/".format(level4_a, level4_b)
         level5_dir = "run_{}".format(int(run_id))
-        
+
         if self.flatten:
             path = os.path.join(self.root, previous['campaign_dir'], 'runs', level5_dir)
         else:
             path = os.path.join(self.root, previous['campaign_dir'], 'runs',
                                 level1_dir, level2_dir, level3_dir, level4_dir, level5_dir)
-      
+
         Path(path).mkdir(parents=True, exist_ok=True)
         previous['rundir'] = path
         self.result = previous
@@ -254,7 +254,7 @@ def set_wrapper(self, wrapper):
         Parameters
         ----------
         wrapper: callable
-            A function to call on each Action. Should pass through the return of the 
+            A function to call on each Action. Should pass through the return of the
             start method.
         """
         self.wrapper = wrapper

diff --git a/easyvvuq/actions/execute_qcgpj.py b/easyvvuq/actions/execute_qcgpj.py
@@ -122,7 +122,13 @@ class QCGPJPool(Executor):
     polling_interval: int
         An interval between queries to the QCG-PilotJob Manager service about state of the tasks, in seconds.
     """
-    def __init__(self, qcgpj_executor=None, template=None, template_params=None, polling_interval=1):
+
+    def __init__(
+            self,
+            qcgpj_executor=None,
+            template=None,
+            template_params=None,
+            polling_interval=1):
         if qcgpj_executor is None:
             qcgpj_executor = QCGPJExecutor()
         if template is None:
@@ -268,6 +274,7 @@ class ExecuteQCGPJ:
     action: Action
         an action that will be decorated in order to enable parallel execution inside a QCG-PilotJob task.
     """
+
     def __init__(self, action):
         self._action = action
 

diff --git a/easyvvuq/analysis/gp_analyse.py b/easyvvuq/analysis/gp_analyse.py
@@ -1,4 +1,4 @@
-"""Will create a Gaussian Process surrogate of your model. For 
+"""Will create a Gaussian Process surrogate of your model. For
 the sampler you can use the random sampler or the quasi-random
 sampler. Don't forget to set the analysis class to GaussianProcessSurrogate
 as is shown in the example below.
@@ -24,6 +24,7 @@
 from .results import AnalysisResults
 import numpy as np
 
+
 class GaussianProcessSurrogateResults(AnalysisResults):
     """Gaussian process surrogate results class. You would never
     create this manually in normal use. It is meant to be returned as the
@@ -38,6 +39,7 @@ class GaussianProcessSurrogateResults(AnalysisResults):
     qoi: str
         Output variable name.
     """
+
     def __init__(self, gp, parameters, qoi):
         self.gp = gp
         self.parameters = parameters
@@ -97,8 +99,8 @@ def analyse(self, data_frame=None):
            `GaussianProcessSurrogateResults` instance. Used to interact with the surrogate
            model and to possibly access other functionality provided by the fitted model.
         """
-        x = data_frame[self.attr_cols].values #lgtm [py/hash-unhashable-value]
-        y = data_frame[self.target_cols].values #lgtm [py/hash-unhashable-value]
+        x = data_frame[self.attr_cols].values  # lgtm [py/hash-unhashable-value]
+        y = data_frame[self.target_cols].values  # lgtm [py/hash-unhashable-value]
         gp = GaussianProcessRegressor(**self.kwargs)
         gp = gp.fit(x, y)
         return GaussianProcessSurrogateResults(gp, self.attr_cols, self.target_cols)
diff --git a/easyvvuq/analysis/mcmc.py b/easyvvuq/analysis/mcmc.py
@@ -50,7 +50,7 @@ def plot_hist(self, input_parameter, chain=None, skip=0, merge=True):
 
     def plot_chains(self, input_parameter, chain=None):
         """Will plot the chains with the input parameter value in the y axis.
-        
+
         Parameters
         ----------
         input_parameter: str
@@ -74,6 +74,7 @@ class MCMCAnalysis(BaseAnalysisElement):
     sampler: MCMCSampler
        An instance of MCMCSampler used to generate MCMC samples.
     """
+
     def __init__(self, sampler):
         self.sampler = sampler
 

diff --git a/easyvvuq/analysis/pce_analysis.py b/easyvvuq/analysis/pce_analysis.py
@@ -138,7 +138,8 @@ def swap(x):
                 else:
                     return x[0]
             values = np.array([inputs[key] for key in self.inputs])
-            results = dict([(qoi, swap((self.raw_data['fit'][qoi](*values)).T)) for qoi in self.qois])
+            results = dict([(qoi, swap((self.raw_data['fit'][qoi](*values)).T))
+                           for qoi in self.qois])
             return results
         return surrogate_fn
 

diff --git a/easyvvuq/analysis/sc_analysis.py b/easyvvuq/analysis/sc_analysis.py
@@ -246,7 +246,7 @@ def analyse(self, data_frame=None, compute_moments=True, compute_Sobols=True):
                     std_k = np.sqrt(var_k)
                 else:
                     pce_coefs = self.SC2PCE(self.samples[qoi_k])
-                    mean_k, var_k = self.get_pce_stats(self.l_norm, pce_coefs, self.comb_coef)
+                    mean_k, var_k, _ = self.get_pce_stats(self.l_norm, pce_coefs, self.comb_coef)
                     std_k = np.sqrt(var_k)
 
                 # compute statistical moments
@@ -325,7 +325,7 @@ def adapt_dimension(self, qoi, data_frame, store_stats_history=True,
             name of the refinement error, default is 'surplus'. In this case the
             error is based on the hierarchical surplus, which is an interpolation
             based error. Another possibility is 'var',
-            in which case the error is based on the difference in the 
+            in which case the error is based on the difference in the
             variance between the current estimate and the estimate obtained
             when a particular candidate direction is added.
         """
@@ -343,7 +343,7 @@ def adapt_dimension(self, qoi, data_frame, store_stats_history=True,
             self.wi_1d = self.sampler.wi_1d
             self.pce_coefs = self.SC2PCE(samples, verbose=True, l_norm=all_idx,
                                          xi_d=self.sampler.xi_d)
-            _, var_l = self.get_pce_stats(self.l_norm, self.pce_coefs, self.comb_coef)
+            _, var_l, _ = self.get_pce_stats(self.l_norm, self.pce_coefs, self.comb_coef)
 
         # the currently accepted grid points
         xi_d_accepted = self.sampler.generate_grid(self.l_norm)
@@ -378,7 +378,7 @@ def adapt_dimension(self, qoi, data_frame, store_stats_history=True,
                 candidate_l_norm = np.concatenate((self.l_norm, l.reshape([1, self.N])))
                 # now we must recompute the combination coefficients
                 c_l = self.compute_comb_coef(l_norm=candidate_l_norm)
-                _, var_candidate_l = self.get_pce_stats(candidate_l_norm, self.pce_coefs, c_l)
+                _, var_candidate_l, _ = self.get_pce_stats(candidate_l_norm, self.pce_coefs, c_l)
                 #error in var
                 error[tuple(l)] = np.linalg.norm(var_candidate_l - var_l, np.inf)
             else:
@@ -413,7 +413,7 @@ def adapt_dimension(self, qoi, data_frame, store_stats_history=True,
             # mean_f, var_f = self.get_moments(qoi)
             logging.debug('Storing moments of iteration %d' % self.sampler.nadaptations)
             pce_coefs = self.SC2PCE(samples, verbose=True)
-            mean_f, var_f = self.get_pce_stats(self.l_norm, pce_coefs, self.comb_coef)
+            mean_f, var_f, _ = self.get_pce_stats(self.l_norm, pce_coefs, self.comb_coef)
             self.mean_history.append(mean_f)
             self.std_history.append(var_f)
             logging.debug('done')
@@ -889,8 +889,8 @@ def SC2PCE(self, samples, verbose=True, **kwargs):
 
                     # orthogonal polynomial generated by chaospy
                     phi_k = [cp.expansion.stieltjes(k[n] - 1,
-                                         dist=self.sampler.params_distribution[n],
-                                         normed=True)[-1] for n in range(self.N)]
+                                                    dist=self.sampler.params_distribution[n],
+                                                    normed=True)[-1] for n in range(self.N)]
 
                     # the polynomial order of each integrand phi_k*a_j = (k - 1) + (number of
                     # colloc. points - 1)
@@ -950,8 +950,49 @@ def SC2PCE(self, samples, verbose=True, **kwargs):
         logging.debug('done')
         return pce_coefs
 
+    def generalized_pce_coefs(self, l_norm, pce_coefs, comb_coef):
+        """
+        Computes the generalized PCE coefficients, defined as the linear combibation
+        of PCE coefficients which make it possible to write the dimension-adaptive
+        PCE expansion in standard form. See DOI: 10.13140/RG.2.2.18085.58083/1
+
+        Parameters
+        ----------
+        l_norm : array
+            array of quadrature order multi indices
+        pce_coefs : tuple
+            tuple of PCE coefficients computed by SC2PCE subroutine
+        comb_coef : tuple
+            tuple of combination coefficients computed by compute_comb_coef
+
+        Returns
+        -------
+        gen_pce_coefs : tuple
+            The generalized PCE coefficients, indexed per multi index.
+
+        """
+        assert self.sparse, "Generalized PCE coeffcients are computed only for sparse grids"
+
+        # the set of all forward neighbours of l: {k | k >= l}
+        F_l = {}
+        # the generalized PCE coefs, which turn the adaptive PCE into a standard PCE expansion
+        gen_pce_coefs = {}
+        for l in l_norm:
+            # {indices of k | k >= l}
+            idx = np.where((l <= l_norm).all(axis=1))[0]
+            F_l[tuple(l)] = l_norm[idx]
+
+            # the generalized PCE coefs are comb_coef[k] * pce_coefs[k][l], summed over k
+            # for a fixed l
+            gen_pce_coefs[tuple(l)] = 0.0
+            for k in F_l[tuple(l)]:
+                gen_pce_coefs[tuple(l)] += comb_coef[tuple(k)] * pce_coefs[tuple(k)][tuple(l)]
+
+        return gen_pce_coefs
+
     def get_pce_stats(self, l_norm, pce_coefs, comb_coef):
-        """Compute the mean and the variance based on the PCE coefficients
+        """Compute the mean and the variance based on the generalized PCE coefficients
+        See DOI: 10.13140/RG.2.2.18085.58083/1
 
         Parameters
         ----------
@@ -967,30 +1008,28 @@ def get_pce_stats(self, l_norm, pce_coefs, comb_coef):
         tuple with mean and variance based on the PCE coefficients
         """
 
-        # Compute the PCE mean
-        k1 = tuple(np.ones(self.N, dtype=int))
-        mean = 0.0
-        for l in l_norm:
-            mean = mean + comb_coef[tuple(l)] * pce_coefs[tuple(l)][k1]
+        gen_pce_coefs = self.generalized_pce_coefs(l_norm, pce_coefs, comb_coef)
 
+        # with the generalized pce coefs, the standard PCE formulas for the mean and var
+        # can be used for the dimension-adaptive PCE
+
+        # the PCE mean is just the 1st generalized PCE coef
+        l1 = tuple(np.ones(self.N, dtype=int))
+        mean = gen_pce_coefs[l1]
+
+        # the variance is the sum of the squared generalized PCE coefs, excluding the 1st coef
         D = 0.0
-        for k in l_norm[1:]:
-            var_k = 0.0
-            for l in l_norm[1:]:
-                if tuple(k) in pce_coefs[tuple(l)].keys():
-                    eta_k = pce_coefs[tuple(l)][tuple(k)]
-                    var_k = var_k + comb_coef[tuple(l)] * eta_k
-            var_k = var_k**2
-            D = D + var_k
+        for l in l_norm[1:]:
+            D += gen_pce_coefs[tuple(l)] ** 2
 
-        return mean, D
+        return mean, D, gen_pce_coefs
 
     def get_pce_sobol_indices(self, qoi, typ='first_order', **kwargs):
         """Computes Sobol indices using Polynomials Chaos coefficients. These
         coefficients are computed from the SC expansion via a transformation
         of basis (SC2PCE subroutine). This works better than computing the
         Sobol indices directly from the SC expansion in the case of the
-        dimension-adaptive sampler.
+        dimension-adaptive sampler. See DOI: 10.13140/RG.2.2.18085.58083/1
 
         Method: J.D. Jakeman et al, "Adaptive multi-index collocation
         for uncertainty quantification and sensitivity analysis", 2019.
@@ -1021,27 +1060,9 @@ def get_pce_sobol_indices(self, qoi, typ='first_order', **kwargs):
             samples = self.samples[qoi]
             N_qoi = self.N_qoi
 
-        # compute the PCE coefficients
+        # compute the (generalized) PCE coefficients and stats
         self.pce_coefs = self.SC2PCE(samples)
-
-        # Compute the PCE mean (not really required)
-        k1 = tuple(np.ones(self.N, dtype=int))
-        mean = 0.0
-        for l in self.l_norm:
-            mean = mean + self.comb_coef[tuple(l)] * self.pce_coefs[tuple(l)][k1]
-
-        # dict to hold the variance per multi index k
-        var = {}
-        # D = total PCE variance
-        D = 0.0
-        for k in self.l_norm[1:]:
-            var_k = 0.0
-            for l in self.l_norm[1:]:
-                if tuple(k) in self.pce_coefs[tuple(l)].keys():
-                    eta_k = self.pce_coefs[tuple(l)][tuple(k)]
-                    var_k = var_k + self.comb_coef[tuple(l)] * eta_k
-            var[tuple(k)] = var_k**2
-            D = D + var[tuple(k)]
+        mean, D, gen_pce_coefs = self.get_pce_stats(self.l_norm, self.pce_coefs, self.comb_coef)
 
         logging.debug('Computing Sobol indices...')
         # Universe = (0, 1, ..., N - 1)
@@ -1091,7 +1112,7 @@ def get_pce_sobol_indices(self, qoi, typ='first_order', **kwargs):
             logging.debug('Multi indices of dimension  %s are %s' % (u, k))
             # the partial variance of u is the sum of all variances index by k
             for k_u in k:
-                D_u[u] = D_u[u] + var[tuple(k_u)]
+                D_u[u] = D_u[u] + gen_pce_coefs[tuple(k_u)] ** 2
 
             # normalize D_u by total variance D to get the Sobol index
             S_u[u] = D_u[u] / D
@@ -1284,13 +1305,12 @@ def get_uncertainty_amplification(self, qoi):
         CV_out = np.mean(CV_out[idx])
         blowup = CV_out / CV_in
 
-        logging.debug('-----------------')
-        logging.debug('Mean CV input = %.4f %%' % (100 * CV_in, ))
-        logging.debug('Mean CV output = %.4f %%' % (100 * CV_out, ))
-        logging.debug(
-            'Uncertainty amplification factor = %.4f/%.4f = %.4f' %
+        print('-----------------')
+        print('Mean CV input = %.4f %%' % (100 * CV_in, ))
+        print('Mean CV output = %.4f %%' % (100 * CV_out, ))
+        print('Uncertainty amplification factor = %.4f/%.4f = %.4f' %
             (CV_out, CV_in, blowup))
-        logging.debug('-----------------')
+        print('-----------------')
 
         return blowup
 

diff --git a/easyvvuq/campaign.py b/easyvvuq/campaign.py
@@ -573,8 +573,11 @@ def get_collation_result(self, last_iteration=False):
             iteration = self._active_sampler.iteration - 1
         else:
             iteration = -1
-        return self.campaign_db.get_results(self._active_app['name'], self._active_sampler_id,
-                                            status=easyvvuq.constants.Status.COLLATED, iteration=iteration)
+        return self.campaign_db.get_results(
+            self._active_app['name'],
+            self._active_sampler_id,
+            status=easyvvuq.constants.Status.COLLATED,
+            iteration=iteration)
 
     def get_invalid_runs(self, last_iteration=False):
         """Return dataframe containing all results marked as INVALID.
@@ -595,8 +598,11 @@ def get_invalid_runs(self, last_iteration=False):
             iteration = self._active_sampler.iteration - 1
         else:
             iteration = -1
-        return self.campaign_db.get_results(self._active_app['name'], self._active_sampler_id,
-                                            status=easyvvuq.constants.Status.INVALID, iteration=iteration)
+        return self.campaign_db.get_results(
+            self._active_app['name'],
+            self._active_sampler_id,
+            status=easyvvuq.constants.Status.INVALID,
+            iteration=iteration)
 
     def apply_analysis(self, analysis):
         """Run the `analysis` element on the output of the last run collation.

diff --git a/easyvvuq/decoders/simple_csv.py b/easyvvuq/decoders/simple_csv.py
@@ -42,6 +42,7 @@ class SimpleCSV:
     ouput_columns: list
         A list of column names that will be selected to appear in the output.
     """
+
     def __init__(self, target_filename, output_columns, dialect='excel'):
         if len(output_columns) == 0:
             msg = "output_columns cannot be empty."
@@ -56,7 +57,7 @@ def __init__(self, target_filename, output_columns, dialect='excel'):
     def _get_output_path(run_info=None, outfile=None):
         """Constructs absolute path from the `target_filename` and the `run_dir` parameter
         in the `run_info` retrieved from the database.
-        
+
         Parameters
         ----------
         run_info: dict