Source code for pycbc.inference.models.analytic

# Copyright (C) 2018  Collin Capano
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

"""
This modules provides models that have analytic solutions for the
log likelihood.
"""

import logging
import numpy
import numpy.random
from scipy import stats

from .base import BaseModel


[docs]class TestNormal(BaseModel): r"""The test distribution is an multi-variate normal distribution. The number of dimensions is set by the number of ``variable_params`` that are passed. For details on the distribution used, see ``scipy.stats.multivariate_normal``. Parameters ---------- variable_params : (tuple of) string(s) A tuple of parameter names that will be varied. mean : array-like, optional The mean values of the parameters. If None provide, will use 0 for all parameters. cov : array-like, optional The covariance matrix of the parameters. If None provided, will use unit variance for all parameters, with cross-terms set to 0. **kwargs : All other keyword arguments are passed to ``BaseModel``. Examples -------- Create a 2D model with zero mean and unit variance: >>> m = TestNormal(['x', 'y']) Set the current parameters and evaluate the log posterior: >>> m.update(x=-0.2, y=0.1) >>> m.logposterior -1.8628770664093453 See the current stats that were evaluated: >>> m.current_stats {'logjacobian': 0.0, 'loglikelihood': -1.8628770664093453, 'logprior': 0.0} """ name = "test_normal" def __init__(self, variable_params, mean=None, cov=None, **kwargs): # set up base likelihood parameters super(TestNormal, self).__init__(variable_params, **kwargs) # store the pdf if mean is None: mean = [0.]*len(variable_params) if cov is None: cov = [1.]*len(variable_params) self._dist = stats.multivariate_normal(mean=mean, cov=cov) # check that the dimension is correct if self._dist.dim != len(variable_params): raise ValueError("dimension mis-match between variable_params and " "mean and/or cov") def _loglikelihood(self): """Returns the log pdf of the multivariate normal. """ return self._dist.logpdf([self.current_params[p] for p in self.variable_params])
[docs]class TestEggbox(BaseModel): r"""The test distribution is an 'eggbox' function: .. math:: \log \mathcal{L}(\Theta) = \left[ 2+\prod_{i=1}^{n}\cos\left(\frac{\theta_{i}}{2}\right)\right]^{5} The number of dimensions is set by the number of ``variable_params`` that are passed. Parameters ---------- variable_params : (tuple of) string(s) A tuple of parameter names that will be varied. **kwargs : All other keyword arguments are passed to ``BaseModel``. """ name = "test_eggbox" def __init__(self, variable_params, **kwargs): # set up base likelihood parameters super(TestEggbox, self).__init__(variable_params, **kwargs) def _loglikelihood(self): """Returns the log pdf of the eggbox function. """ return (2 + numpy.prod(numpy.cos([ self.current_params[p]/2. for p in self.variable_params]))) ** 5
[docs]class TestRosenbrock(BaseModel): r"""The test distribution is the Rosenbrock function: .. math:: \log \mathcal{L}(\Theta) = -\sum_{i=1}^{n-1}[ (1-\theta_{i})^{2}+100(\theta_{i+1} - \theta_{i}^{2})^{2}] The number of dimensions is set by the number of ``variable_params`` that are passed. Parameters ---------- variable_params : (tuple of) string(s) A tuple of parameter names that will be varied. **kwargs : All other keyword arguments are passed to ``BaseModel``. """ name = "test_rosenbrock" def __init__(self, variable_params, **kwargs): # set up base likelihood parameters super(TestRosenbrock, self).__init__(variable_params, **kwargs) def _loglikelihood(self): """Returns the log pdf of the Rosenbrock function. """ logl = 0 p = [self.current_params[p] for p in self.variable_params] for i in range(len(p) - 1): logl -= ((1 - p[i])**2 + 100 * (p[i+1] - p[i]**2)**2) return logl
[docs]class TestVolcano(BaseModel): r"""The test distribution is a two-dimensional 'volcano' function: .. math:: \Theta = \sqrt{\theta_{1}^{2} + \theta_{2}^{2}} \log \mathcal{L}(\Theta) = 25\left(e^{\frac{-\Theta}{35}} + \frac{1}{2\sqrt{2\pi}} e^{-\frac{(\Theta-5)^{2}}{8}}\right) Parameters ---------- variable_params : (tuple of) string(s) A tuple of parameter names that will be varied. Must have length 2. **kwargs : All other keyword arguments are passed to ``BaseModel``. """ name = "test_volcano" def __init__(self, variable_params, **kwargs): # set up base likelihood parameters super(TestVolcano, self).__init__(variable_params, **kwargs) # make sure there are exactly two variable args if len(self.variable_params) != 2: raise ValueError("TestVolcano distribution requires exactly " "two variable args") def _loglikelihood(self): """Returns the log pdf of the 2D volcano function. """ p = [self.current_params[p] for p in self.variable_params] r = numpy.sqrt(p[0]**2 + p[1]**2) mu, sigma = 5.0, 2.0 return 25 * ( numpy.exp(-r/35) + 1 / (sigma * numpy.sqrt(2 * numpy.pi)) * numpy.exp(-0.5 * ((r - mu) / sigma) ** 2))
[docs]class TestPrior(BaseModel): r"""Uses the prior as the test distribution. Parameters ---------- variable_params : (tuple of) string(s) A tuple of parameter names that will be varied. Must have length 2. **kwargs : All other keyword arguments are passed to ``BaseModel``. """ name = "test_prior" def __init__(self, variable_params, **kwargs): # set up base likelihood parameters super(TestPrior, self).__init__(variable_params, **kwargs) def _loglikelihood(self): """Returns zero. """ return 0.
[docs]class TestPosterior(BaseModel): r"""Build a test posterior from a set of samples using a kde Parameters ---------- variable_params : (tuple of) string(s) A tuple of parameter names that will be varied. posterior_file : hdf file A compatible pycbc inference output file which posterior samples can be read from. nsamples : int Number of samples to draw from posterior file to build KDE. **kwargs : All other keyword arguments are passed to ``BaseModel``. """ name = "test_posterior" def __init__(self, variable_params, posterior_file, nsamples, **kwargs): super(TestPosterior, self).__init__(variable_params, **kwargs) from pycbc.inference.io import loadfile # avoid cyclic import logging.info('loading test posterior model') inf_file = loadfile(posterior_file) logging.info('reading samples') samples = inf_file.read_samples(variable_params) samples = numpy.array([samples[v] for v in variable_params]) # choose only the requested amount of samples idx = numpy.arange(0, samples.shape[-1]) idx = numpy.random.choice(idx, size=int(nsamples), replace=False) samples = samples[:, idx] logging.info('making kde with %s samples', samples.shape[-1]) self.kde = stats.gaussian_kde(samples) logging.info('done initializing test posterior model') def _loglikelihood(self): """Returns the log pdf of the test posterior kde """ p = numpy.array([self.current_params[p] for p in self.variable_params]) logpost = self.kde.logpdf(p) return float(logpost[0])