Source code for pycbc.inference.sampler.nessai

"""
This modules provides class for using the nessai sampler package for parameter
estimation.

Documentation for nessai: https://nessai.readthedocs.io/en/latest/
"""
import ast
import logging
import os

import nessai.flowsampler
import nessai.model
import nessai.livepoint
import nessai.utils.multiprocessing
import nessai.utils.settings
import numpy
import numpy.lib.recfunctions as rfn

from .base import BaseSampler, setup_output
from .base_mcmc import get_optional_arg_from_config
from ..io import NessaiFile, loadfile
from ...pool import choose_pool


[docs] class NessaiSampler(BaseSampler): """Class to construct a FlowSampler from the nessai package.""" name = "nessai" _io = NessaiFile def __init__( self, model, loglikelihood_function, nlive=1000, nprocesses=1, use_mpi=False, run_kwds=None, extra_kwds=None, ): super().__init__(model) self.nlive = nlive self.model_call = NessaiModel(self.model, loglikelihood_function) self.extra_kwds = extra_kwds if extra_kwds is not None else {} self.run_kwds = run_kwds if run_kwds is not None else {} nessai.utils.multiprocessing.initialise_pool_variables(self.model_call) self.pool = choose_pool(mpi=use_mpi, processes=nprocesses) self.nprocesses = nprocesses self._sampler = None self._nested_samples = None self._posterior_samples = None self._logz = None self._dlogz = None self.checkpoint_file = None self.resume_data = None @property def io(self): return self._io @property def model_stats(self): pass @property def samples(self): """The raw nested samples including the corresponding weights""" if self._sampler.ns.nested_samples: ns = numpy.array(self._sampler.ns.nested_samples) samples = nessai.livepoint.live_points_to_dict( ns, self.model.sampling_params, ) samples["logwt"] = self._sampler.ns.state.log_posterior_weights samples["loglikelihood"] = ns["logL"] samples["logprior"] = ns["logP"] samples["it"] = ns["it"] else: samples = {} return samples
[docs] def run(self, **kwargs): """Run the sampler""" default_kwds, default_run_kwds = self.get_default_kwds( importance_nested_sampler=self.extra_kwds.get( "importance_nested_sampler", False ) ) extra_kwds = self.extra_kwds.copy() run_kwds = self.run_kwds.copy() # Output in kwargs takes priority of extra kwds. output = kwargs.pop("output", extra_kwds.pop("output", None)) # If neither have been specified, use the path from the checkpoint file if output is None: output = os.path.join( os.path.dirname(os.path.abspath(self.checkpoint_file)), "outdir_nessai", ) if kwargs is not None: logging.info("Updating keyword arguments with %s", kwargs) extra_kwds.update( {k: v for k, v in kwargs.items() if k in default_kwds} ) run_kwds.update( {k: v for k, v in kwargs.items() if k in default_run_kwds} ) if self._sampler is None: logging.info("Initialising nessai FlowSampler") self._sampler = nessai.flowsampler.FlowSampler( self.model_call, output=output, pool=self.pool, n_pool=self.nprocesses, close_pool=False, signal_handling=False, resume_data=self.resume_data, checkpoint_callback=self.checkpoint_callback, **extra_kwds, ) logging.info("Starting sampling with nessai") self._sampler.run(**run_kwds)
[docs] @staticmethod def get_default_kwds(importance_nested_sampler=False): """Return lists of all allowed keyword arguments for nessai. Returns ------- default_kwds : list List of keyword arguments that can be passed to FlowSampler run_kwds: list List of keyword arguments that can be passed to FlowSampler.run """ return nessai.utils.settings.get_all_kwargs( importance_nested_sampler=importance_nested_sampler, split_kwargs=True, )
[docs] @classmethod def from_config( cls, cp, model, output_file=None, nprocesses=1, use_mpi=False ): """ Loads the sampler from the given config file. """ section = "sampler" # check name assert ( cp.get(section, "name") == cls.name ), "name in section [sampler] must match mine" if cp.has_option(section, "importance_nested_sampler"): importance_nested_sampler = cp.get( section, "importance_nested_sampler", ) else: importance_nested_sampler = False # Requires additional development work, see the model class below if importance_nested_sampler is True: raise NotImplementedError( "Importance nested sampler is not currently supported" ) default_kwds, default_run_kwds = cls.get_default_kwds( importance_nested_sampler ) # Keyword arguments the user cannot configure via the config remove_kwds = [ "pool", "n_pool", "close_pool", "signal_handling", "checkpoint_callback", ] for kwd in remove_kwds: default_kwds.pop(kwd, None) default_run_kwds.pop(kwd, None) kwds = {} run_kwds = {} # ast.literal_eval is used here since specifying a dictionary with all # various types would be difficult. However, one may wish to revisit # this in future, e.g. if evaluating code is a concern. for d_out, d_defaults in zip( [kwds, run_kwds], [default_kwds, default_run_kwds] ): for k in d_defaults.keys(): if cp.has_option(section, k): option = cp.get(section, k) try: # This will fail for e.g. a string with an underscore option = ast.literal_eval(option) except ValueError: pass d_out[k] = option # Specified kwds ignore_kwds = {"nlive", "name"} invalid_kwds = ( cp[section].keys() - set().union(kwds.keys(), run_kwds.keys()) - ignore_kwds ) if invalid_kwds: raise RuntimeError( f"Config contains unknown options: {invalid_kwds}" ) logging.info("nessai keyword arguments: %s", kwds) logging.info("nessai run keyword arguments: %s", run_kwds) loglikelihood_function = get_optional_arg_from_config( cp, section, "loglikelihood-function" ) obj = cls( model, loglikelihood_function=loglikelihood_function, nprocesses=nprocesses, use_mpi=use_mpi, run_kwds=run_kwds, extra_kwds=kwds, ) # Do not need to check number of samples for a nested sampler setup_output(obj, output_file, check_nsamples=False) if not obj.new_checkpoint: obj.resume_from_checkpoint() return obj
[docs] def set_initial_conditions( self, initial_distribution=None, samples_file=None, ): """Sets up the starting point for the sampler. This is not used for nessai. """
[docs] def checkpoint_callback(self, state): """Callback for checkpointing. This will be called periodically by nessai. """ for fn in [self.checkpoint_file, self.backup_file]: with self.io(fn, "a") as fp: fp.write_pickled_data_into_checkpoint_file(state) self.write_results(fn)
[docs] def checkpoint(self): """Checkpoint the sampler""" self.checkpoint_callback(self._sampler.ns)
[docs] def resume_from_checkpoint(self): """Reads the resume data from the checkpoint file.""" try: with loadfile(self.checkpoint_file, "r") as fp: self.resume_data = fp.read_pickled_data_from_checkpoint_file() logging.info( "Found valid checkpoint file: %s", self.checkpoint_file ) except Exception as e: logging.info("Failed to load checkpoint file with error: %s", e)
[docs] def finalize(self): """Finalize sampling""" logz = self._sampler.ns.log_evidence dlogz = self._sampler.ns.log_evidence_error logging.info("log Z, dlog Z: %s, %s", logz, dlogz) self.checkpoint()
[docs] def write_results(self, filename): """Write the results to a given file. Writes the nested samples, log-evidence and log-evidence error. """ with self.io(filename, "a") as fp: fp.write_raw_samples(self.samples) fp.write_logevidence( self._sampler.ns.log_evidence, self._sampler.ns.log_evidence_error, )
[docs] class NessaiModel(nessai.model.Model): """Wrapper for PyCBC Inference model class for use with nessai. Parameters ---------- model : inference.BaseModel instance A model instance from PyCBC. loglikelihood_function : str Name of the log-likelihood method to call. """ def __init__(self, model, loglikelihood_function=None): self.model = model self.names = list(model.sampling_params) # Configure the log-likelihood function if loglikelihood_function is None: loglikelihood_function = "loglikelihood" self.loglikelihood_function = loglikelihood_function # Configure the priors bounds bounds = {} for dist in model.prior_distribution.distributions: bounds.update( **{ k: [v.min, v.max] for k, v in dist.bounds.items() if k in self.names } ) self.bounds = bounds # Prior and likelihood are not vectorised self.vectorised_likelihood = False self.vectorised_prior = False # Use the pool for computing the prior self.parallelise_prior = True
[docs] def to_dict(self, x): """Convert a nessai live point array to a dictionary""" return {n: x[n].item() for n in self.names}
[docs] def to_live_points(self, x): """Convert to the structured arrays used by nessai""" # It is possible this could be made faster return nessai.livepoint.numpy_array_to_live_points( rfn.structured_to_unstructured(x), self.names, )
[docs] def new_point(self, N=1): """Draw a new point""" return self.to_live_points(self.model.prior_rvs(size=N))
[docs] def new_point_log_prob(self, x): """Log-probability for the ``new_point`` method""" return self.batch_evaluate_log_prior(x)
[docs] def log_prior(self, x): """Compute the log-prior""" self.model.update(**self.to_dict(x)) return self.model.logprior
[docs] def log_likelihood(self, x): """Compute the log-likelihood""" self.model.update(**self.to_dict(x)) return getattr(self.model, self.loglikelihood_function)
[docs] def from_unit_hypercube(self, x): """Map from the unit-hypercube to the prior.""" # Needs to be implemented for importance nested sampler # This method is already available in pycbc but the inverse is not raise NotImplementedError
[docs] def to_unit_hypercube(self, x): """Map to the unit-hypercube to the prior.""" # Needs to be implemented for importance nested sampler raise NotImplementedError