Source code for pycbc.distributions.gaussian

# Copyright (C) 2016  Christopher M. Biwer, Collin Capano
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
"""
This modules provides classes for evaluating Gaussian distributions.
"""
import logging
import numpy
from scipy.special import erf, erfinv
import scipy.stats

from pycbc.distributions import bounded

logger = logging.getLogger('pycbc.distributions.gaussian')

[docs] class Gaussian(bounded.BoundedDist): r"""A Gaussian distribution on the given parameters; the parameters are independent of each other. Bounds can be provided on each parameter, in which case the distribution will be a truncated Gaussian distribution. The PDF of a truncated Gaussian distribution is given by: .. math:: p(x|a, b, \mu,\sigma) = \frac{1}{\sqrt{2 \pi \sigma^2}}\frac{e^{- \frac{\left( x - \mu \right)^2}{2 \sigma^2}}}{\Phi(b|\mu, \sigma) - \Phi(a|\mu, \sigma)}, where :math:`\mu` is the mean, :math:`\sigma^2` is the variance, :math:`a,b` are the bounds, and :math:`\Phi` is the cumulative distribution of an unbounded normal distribution, given by: .. math:: \Phi(x|\mu, \sigma) = \frac{1}{2}\left[1 + \mathrm{erf}\left(\frac{x-\mu}{\sigma \sqrt{2}}\right)\right]. Note that if :math:`[a,b) = [-\infty, \infty)`, this reduces to a standard Gaussian distribution. Instances of this class can be called like a function. By default, logpdf will be called, but this can be changed by setting the class's __call__ method to its pdf method. Parameters ---------- \**params : The keyword arguments should provide the names of parameters and (optionally) some bounds, as either a tuple or a `boundaries.Bounds` instance. The mean and variance of each parameter can be provided by additional keyword arguments that have `_mean` and `_var` adding to the parameter name. For example, `foo=(-2,10), foo_mean=3, foo_var=2` would create a truncated Gaussian with mean 3 and variance 2, bounded between :math:`[-2, 10)`. If no mean or variance is provided, the distribution will have 0 mean and unit variance. If None is provided for the bounds, the distribution will be a normal, unbounded Gaussian (equivalent to setting the bounds to `[-inf, inf)`). Examples -------- Create an unbounded Gaussian distribution with zero mean and unit variance: >>> dist = distributions.Gaussian(mass1=None) Create a bounded Gaussian distribution on :math:`[1,10)` with a mean of 3 and a variance of 2: >>> dist = distributions.Gaussian(mass1=(1,10), mass1_mean=3, mass1_var=2) Create a bounded Gaussian distribution with the same parameters, but with cyclic boundary conditions: >>> dist = distributions.Gaussian(mass1=Bounds(1,10, cyclic=True), mass1_mean=3, mass1_var=2) """ name = "gaussian" def __init__(self, **params): # save distribution parameters as dict # calculate the norm and exponential norm ahead of time # and save to self._norm, self._lognorm, and self._expnorm self._bounds = {} self._mean = {} self._var = {} self._norm = {} self._lognorm = {} self._expnorm = {} # pull out specified means, variance mean_args = [p for p in params if p.endswith('_mean')] var_args = [p for p in params if p.endswith('_var')] self._mean = dict([[p[:-5], params.pop(p)] for p in mean_args]) self._var = dict([[p[:-4], params.pop(p)] for p in var_args]) # initialize the bounds super(Gaussian, self).__init__(**params) # check that there are no params in mean/var that are not in params missing = set(self._mean.keys()) - set(params.keys()) if any(missing): raise ValueError("means provided for unknow params {}".format( ', '.join(missing))) missing = set(self._var.keys()) - set(params.keys()) if any(missing): raise ValueError("vars provided for unknow params {}".format( ', '.join(missing))) # set default mean/var for params not specified self._mean.update(dict([[p, 0.] for p in params if p not in self._mean])) self._var.update(dict([[p, 1.] for p in params if p not in self._var])) # compute norms for p,bnds in self._bounds.items(): sigmasq = self._var[p] mu = self._mean[p] a,b = bnds invnorm = scipy.stats.norm.cdf(b, loc=mu, scale=sigmasq**0.5) \ - scipy.stats.norm.cdf(a, loc=mu, scale=sigmasq**0.5) invnorm *= numpy.sqrt(2*numpy.pi*sigmasq) self._norm[p] = 1./invnorm self._lognorm[p] = numpy.log(self._norm[p]) self._expnorm[p] = -1./(2*sigmasq) @property def mean(self): return self._mean @property def var(self): return self._var def _normalcdf(self, param, value): """The CDF of the normal distribution, without bounds.""" mu = self._mean[param] var = self._var[param] return 0.5*(1. + erf((value - mu)/(2*var)**0.5))
[docs] def cdf(self, param, value): """Returns the CDF of the given parameter value.""" a, b = self._bounds[param] if a != -numpy.inf: phi_a = self._normalcdf(param, a) else: phi_a = 0. if b != numpy.inf: phi_b = self._normalcdf(param, b) else: phi_b = 1. phi_x = self._normalcdf(param, value) return (phi_x - phi_a)/(phi_b - phi_a)
def _normalcdfinv(self, param, p): """The inverse CDF of the normal distribution, without bounds.""" mu = self._mean[param] var = self._var[param] return mu + (2*var)**0.5 * erfinv(2*p - 1.) def _cdfinv_param(self, param, p): """Return inverse of the CDF. """ a, b = self._bounds[param] if a != -numpy.inf: phi_a = self._normalcdf(param, a) else: phi_a = 0. if b != numpy.inf: phi_b = self._normalcdf(param, b) else: phi_b = 1. adjusted_p = phi_a + p * (phi_b - phi_a) return self._normalcdfinv(param, adjusted_p) def _pdf(self, **kwargs): """Returns the pdf at the given values. The keyword arguments must contain all of parameters in self's params. Unrecognized arguments are ignored. """ return numpy.exp(self._logpdf(**kwargs)) def _logpdf(self, **kwargs): """Returns the log of the pdf at the given values. The keyword arguments must contain all of parameters in self's params. Unrecognized arguments are ignored. """ if kwargs in self: return sum([self._lognorm[p] + self._expnorm[p]*(kwargs[p]-self._mean[p])**2. for p in self._params]) else: return -numpy.inf
[docs] @classmethod def from_config(cls, cp, section, variable_args): """Returns a Gaussian distribution based on a configuration file. The parameters for the distribution are retrieved from the section titled "[`section`-`variable_args`]" in the config file. Boundary arguments should be provided in the same way as described in `get_param_bounds_from_config`. In addition, the mean and variance of each parameter can be specified by setting `{param}_mean` and `{param}_var`, respectively. For example, the following would create a truncated Gaussian distribution between 0 and 6.28 for a parameter called `phi` with mean 3.14 and variance 0.5 that is cyclic: .. code-block:: ini [{section}-{tag}] min-phi = 0 max-phi = 6.28 phi_mean = 3.14 phi_var = 0.5 cyclic = Parameters ---------- cp : pycbc.workflow.WorkflowConfigParser A parsed configuration file that contains the distribution options. section : str Name of the section in the configuration file. variable_args : str The names of the parameters for this distribution, separated by `prior.VARARGS_DELIM`. These must appear in the "tag" part of the section header. Returns ------- Gaussian A distribution instance from the pycbc.inference.prior module. """ return bounded.bounded_from_config(cls, cp, section, variable_args, bounds_required=False)
__all__ = ['Gaussian']