Source code for pycbc.inference.sampler.games

""" Direct monte carlo sampling using pregenerated mapping files that
encode the intrinsic parameter space.
"""
import logging
import tqdm
import h5py
import numpy
import numpy.random
from scipy.special import logsumexp
from pycbc.io import FieldArray

from pycbc.inference import models
from pycbc.pool import choose_pool
from .dummy import DummySampler


[docs] def call_likelihood(params): """ Accessor to update the global model """ models._global_instance.update(**params) return models._global_instance.loglikelihood
[docs] class OutOfSamples(Exception): """Exception if we ran out of samples"""
[docs] class GameSampler(DummySampler): """Direct importance sampling using a preconstructed parameter space mapping file. Parameters ---------- model : Model An instance of a model from ``pycbc.inference.models``. mapfile : str Path to the pre-generated file containing the pre-mapped prior volume loglr_region: int Only use regions from the prior volume tiling that are within this loglr difference of the maximum tile. target_likelihood_calls: int Try to use this many likelihood calls in each round of the analysis. rounds: int The number of iterations to use before terminated. """ name = 'games' def __init__(self, model, *args, nprocesses=1, use_mpi=False, mapfile=None, loglr_region=25, target_likelihood_calls=1e5, rounds=1, **kwargs): super().__init__(model, *args) self.meta = {} self.mapfile = mapfile self.rounds = int(rounds) self.dmap = {} self.draw = {} models._global_instance = model self.model = model self.pool = choose_pool(mpi=use_mpi, processes=nprocesses) self._samples = {} self.target_likelihood_calls = int(target_likelihood_calls) self.loglr_region = float(loglr_region)
[docs] def draw_samples_from_bin(self, i, size): """ Get samples from the binned prior space """ if i not in self.draw: self.draw[i] = numpy.arange(0, len(self.dmap[i])) if size > len(self.draw[i]): raise OutOfSamples numpy.random.shuffle(self.draw[i]) selected = self.draw[i][:size] self.draw[i] = self.draw[i][size:] if size > 0: remain = len(self.draw[i]) logging.info('Drew %i, %i remains in bin %i', size, remain, i) return self.dmap[i][selected]
[docs] def sample_round(self, bin_weight, node_idx, lengths): """ Sample from the posterior using pre-binned sets of points and the weighting factor of each bin. bin_weight: Array The weighting importance factor of each bin of the prior space node_idx: Array The set of ids into the prebinned prior volume to use. This should map to the given weights. lengths: Array The size of each bin, used to self-normalize """ logging.info("...draw template bins") drawcount = (bin_weight * self.target_likelihood_calls).astype(int) dorder = bin_weight.argsort()[::-1] remainder = 0 for i in dorder: bincount = drawcount[i] binlen = lengths[i] if bincount > binlen: drawcount[i] = binlen remainder += bincount - binlen elif bincount < binlen: asize = min(binlen - bincount, remainder) drawcount[i] += asize remainder -= asize drawweight = bin_weight / drawcount total_draw = drawcount.sum() logging.info('...drawn random points within bins') psamp = FieldArray(total_draw, dtype=self.dtype) pweight = numpy.zeros(total_draw, dtype=float) bin_id = numpy.zeros(total_draw, dtype=int) j = 0 for i, (c, w) in enumerate(zip(drawcount, drawweight)): bdraw = self.draw_samples_from_bin(node_idx[i], c) psamp[j:j+len(bdraw)] = FieldArray.from_records(bdraw, dtype=self.dtype) pweight[j:j+len(bdraw)] = numpy.log(bin_weight[i]) - numpy.log(w) bin_id[j:j+len(bdraw)] = i j += len(bdraw) logging.info("Possible unique values %s", lengths.sum()) logging.info("Templates drawn from %s", len(lengths)) logging.info("Unique values first draw %s", len(psamp)) # Calculate the likelihood values for the unique parameter space # points args = [] for i in range(len(psamp)): pset = {p: psamp[p][i] for p in self.model.variable_params} args.append(pset) loglr_samp = list(tqdm.tqdm(self.pool.imap(call_likelihood, args), total=len(args))) loglr_samp = numpy.array(loglr_samp) # Calculate the weights from the actual likelihood relative to the # initial weights logw3 = loglr_samp + numpy.log(lengths[bin_id]) - pweight logw3 -= logsumexp(logw3) weight2 = numpy.exp(logw3) return psamp, loglr_samp, weight2, bin_id
[docs] def run(self): """ Produce posterior samples """ logging.info('Retrieving params of parameter space nodes') with h5py.File(self.mapfile, 'r') as mapfile: bparams = {p: mapfile['bank'][p][:] for p in self.variable_params} num_nodes = len(bparams[list(bparams.keys())[0]]) lengths = numpy.array([len(mapfile['map'][str(x)]) for x in range(num_nodes)]) self.dtype = mapfile['map']['0'].dtype logging.info('Calculating likelihood at nodes') args = [] for i in range(num_nodes): pset = {p: bparams[p][i] for p in self.model.variable_params} args.append(pset) node_loglrs = list(tqdm.tqdm(self.pool.imap(call_likelihood, args), total=len(args))) node_loglrs = numpy.array(node_loglrs) loglr_bound = node_loglrs[~numpy.isnan(node_loglrs)].max() loglr_bound -= self.loglr_region logging.info('Drawing proposal samples from node regions') logw = node_loglrs + numpy.log(lengths) passed = (node_loglrs > loglr_bound) & ~numpy.isnan(node_loglrs) passed = numpy.where(passed)[0] logw2 = logw[passed] logw2 -= logsumexp(logw2) weight = numpy.exp(logw2) logging.info("...reading template bins") with h5py.File(self.mapfile, 'r') as mapfile: for i in passed: self.dmap[i] = mapfile['map'][str(i)][:] # Sample from posterior psamp = None loglr_samp = None weight2 = None bin_ids = None weight = lengths[passed] / lengths[passed].sum() for i in range(self.rounds): try: psamp_v, loglr_samp_v, weight2_v, bin_id = \ self.sample_round(weight / weight.sum(), passed, lengths[passed]) except OutOfSamples: logging.info("No more samples to draw from") break for j, v in zip(bin_id, weight2_v): weight[j] += v if psamp is None: psamp = psamp_v loglr_samp = loglr_samp_v weight2 = weight2_v bin_ids = bin_id else: psamp = numpy.concatenate([psamp_v, psamp]) loglr_samp = numpy.concatenate([loglr_samp_v, loglr_samp]) weight2 = numpy.concatenate([weight2_v, weight2]) bin_ids = numpy.concatenate([bin_id, bin_ids]) ess = 1.0 / ((weight2/weight2.sum()) ** 2.0).sum() logging.info("ESS = %s", ess) # Prepare the equally weighted output samples self.meta['ncalls'] = len(weight2) self.meta['ess'] = ess weight2 /= weight2.sum() draw2 = numpy.random.choice(len(psamp), size=int(ess * 1), replace=True, p=weight2) logging.info("Unique values second draw %s", len(numpy.unique(psamp[draw2]))) fsamp = FieldArray(len(draw2), dtype=self.dtype) for i, v in enumerate(draw2): fsamp[i] = psamp[v] self._samples = {p: fsamp[p] for p in self.model.variable_params} self._samples['loglikelihood'] = loglr_samp[draw2]