Source code for pycbc.inference.sampler.games

""" Direct monte carlo sampling using pregenerated mapping files that
encode the intrinsic parameter space.
"""
import logging
import tqdm
import h5py
import numpy
import numpy.random
from scipy.special import logsumexp
from pycbc.io import FieldArray

from pycbc.inference import models
from pycbc.pool import choose_pool
from .dummy import DummySampler



[docs]
def call_likelihood(params):
    """ Accessor to update the global model
    """
    models._global_instance.update(**params)
    return models._global_instance.loglikelihood




[docs]
class OutOfSamples(Exception):
    """Exception if we ran out of samples"""




[docs]
class GameSampler(DummySampler):
    """Direct importance sampling using a preconstructed parameter space
    mapping file.

    Parameters
    ----------
    model : Model
        An instance of a model from ``pycbc.inference.models``.
    mapfile : str
        Path to the pre-generated file containing the pre-mapped prior volume
    loglr_region: int
        Only use regions from the prior volume tiling that are within
        this loglr difference of the maximum tile.
    target_likelihood_calls: int
        Try to use this many likelihood calls in each round of the analysis.
    rounds: int
        The number of iterations to use before terminated.
    """
    name = 'games'

    def __init__(self, model, *args, nprocesses=1, use_mpi=False,
                 mapfile=None,
                 loglr_region=25,
                 target_likelihood_calls=1e5,
                 rounds=1,
                 **kwargs):
        super().__init__(model, *args)

        self.meta = {}
        self.mapfile = mapfile
        self.rounds = int(rounds)
        self.dmap = {}
        self.draw = {}

        models._global_instance = model
        self.model = model
        self.pool = choose_pool(mpi=use_mpi, processes=nprocesses)
        self._samples = {}

        self.target_likelihood_calls = int(target_likelihood_calls)
        self.loglr_region = float(loglr_region)


[docs]
    def draw_samples_from_bin(self, i, size):
        """ Get samples from the binned prior space """
        if i not in self.draw:
            self.draw[i] = numpy.arange(0, len(self.dmap[i]))

        if size > len(self.draw[i]):
            raise OutOfSamples

        numpy.random.shuffle(self.draw[i])
        selected = self.draw[i][:size]
        self.draw[i] = self.draw[i][size:]

        if size > 0:
            remain = len(self.draw[i])
            logging.info('Drew %i, %i remains in bin %i', size, remain, i)

        return self.dmap[i][selected]



[docs]
    def sample_round(self, bin_weight, node_idx, lengths):
        """ Sample from the posterior using pre-binned sets of points and
        the weighting factor of each bin.

        bin_weight: Array
            The weighting importance factor of each bin of the prior space
        node_idx: Array
            The set of ids into the prebinned prior volume to use. This should
            map to the given weights.
        lengths: Array
            The size of each bin, used to self-normalize
        """
        logging.info("...draw template bins")
        drawcount = (bin_weight * self.target_likelihood_calls).astype(int)

        dorder = bin_weight.argsort()[::-1]
        remainder = 0
        for i in dorder:
            bincount = drawcount[i]
            binlen = lengths[i]
            if bincount > binlen:
                drawcount[i] = binlen
                remainder += bincount - binlen
            elif bincount < binlen:
                asize = min(binlen - bincount, remainder)
                drawcount[i] += asize
                remainder -= asize

        drawweight = bin_weight / drawcount
        total_draw = drawcount.sum()

        logging.info('...drawn random points within bins')
        psamp = FieldArray(total_draw, dtype=self.dtype)
        pweight = numpy.zeros(total_draw, dtype=float)
        bin_id = numpy.zeros(total_draw, dtype=int)
        j = 0
        for i, (c, w) in enumerate(zip(drawcount, drawweight)):
            bdraw = self.draw_samples_from_bin(node_idx[i], c)
            psamp[j:j+len(bdraw)] = FieldArray.from_records(bdraw,
                                                            dtype=self.dtype)
            pweight[j:j+len(bdraw)] = numpy.log(bin_weight[i]) - numpy.log(w)
            bin_id[j:j+len(bdraw)] = i
            j += len(bdraw)

        logging.info("Possible unique values %s", lengths.sum())
        logging.info("Templates drawn from %s", len(lengths))
        logging.info("Unique values first draw %s", len(psamp))

        # Calculate the likelihood values for the unique parameter space
        # points
        args = []
        for i in range(len(psamp)):
            pset = {p: psamp[p][i] for p in self.model.variable_params}
            args.append(pset)

        loglr_samp = list(tqdm.tqdm(self.pool.imap(call_likelihood, args),
                                    total=len(args)))
        loglr_samp = numpy.array(loglr_samp)

        # Calculate the weights from the actual likelihood relative to the
        # initial weights
        logw3 = loglr_samp + numpy.log(lengths[bin_id]) - pweight
        logw3 -= logsumexp(logw3)
        weight2 = numpy.exp(logw3)
        return psamp, loglr_samp, weight2, bin_id



[docs]
    def run(self):
        """ Produce posterior samples """
        logging.info('Retrieving params of parameter space nodes')
        with h5py.File(self.mapfile, 'r') as mapfile:
            bparams = {p: mapfile['bank'][p][:] for p in self.variable_params}
            num_nodes = len(bparams[list(bparams.keys())[0]])
            lengths = numpy.array([len(mapfile['map'][str(x)])
                                   for x in range(num_nodes)])
            self.dtype = mapfile['map']['0'].dtype

        logging.info('Calculating likelihood at nodes')
        args = []
        for i in range(num_nodes):
            pset = {p: bparams[p][i] for p in self.model.variable_params}
            args.append(pset)

        node_loglrs = list(tqdm.tqdm(self.pool.imap(call_likelihood, args),
                                     total=len(args)))
        node_loglrs = numpy.array(node_loglrs)
        loglr_bound = node_loglrs[~numpy.isnan(node_loglrs)].max()
        loglr_bound -= self.loglr_region

        logging.info('Drawing proposal samples from node regions')
        logw = node_loglrs + numpy.log(lengths)
        passed = (node_loglrs > loglr_bound) & ~numpy.isnan(node_loglrs)
        passed = numpy.where(passed)[0]
        logw2 = logw[passed]
        logw2 -= logsumexp(logw2)
        weight = numpy.exp(logw2)

        logging.info("...reading template bins")
        with h5py.File(self.mapfile, 'r') as mapfile:
            for i in passed:
                self.dmap[i] = mapfile['map'][str(i)][:]

        # Sample from posterior
        psamp = None
        loglr_samp = None
        weight2 = None
        bin_ids = None

        weight = lengths[passed] / lengths[passed].sum()

        for i in range(self.rounds):
            try:
                psamp_v, loglr_samp_v, weight2_v, bin_id = \
                    self.sample_round(weight / weight.sum(),
                                      passed, lengths[passed])
            except OutOfSamples:
                logging.info("No more samples to draw from")
                break

            for j, v in zip(bin_id, weight2_v):
                weight[j] += v

            if psamp is None:
                psamp = psamp_v
                loglr_samp = loglr_samp_v
                weight2 = weight2_v
                bin_ids = bin_id
            else:
                psamp = numpy.concatenate([psamp_v, psamp])
                loglr_samp = numpy.concatenate([loglr_samp_v, loglr_samp])
                weight2 = numpy.concatenate([weight2_v, weight2])
                bin_ids = numpy.concatenate([bin_id, bin_ids])

            ess = 1.0 / ((weight2/weight2.sum()) ** 2.0).sum()
            logging.info("ESS = %s", ess)

        # Prepare the equally weighted output samples
        self.meta['ncalls'] = len(weight2)
        self.meta['ess'] = ess

        weight2 /= weight2.sum()
        draw2 = numpy.random.choice(len(psamp), size=int(ess * 1),
                                    replace=True, p=weight2)
        logging.info("Unique values second draw %s",
                     len(numpy.unique(psamp[draw2])))

        fsamp = FieldArray(len(draw2), dtype=self.dtype)
        for i, v in enumerate(draw2):
            fsamp[i] = psamp[v]

        self._samples = {p: fsamp[p] for p in self.model.variable_params}
        self._samples['loglikelihood'] = loglr_samp[draw2]