Source code for pycbc.distributions.gaussian

# Copyright (C) 2016  Christopher M. Biwer, Collin Capano
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
"""
This modules provides classes for evaluating Gaussian distributions.
"""
import logging
import numpy
from scipy.special import erf, erfinv
import scipy.stats

from pycbc.distributions import bounded

logger = logging.getLogger('pycbc.distributions.gaussian')


[docs]
class Gaussian(bounded.BoundedDist):
    r"""A Gaussian distribution on the given parameters; the parameters are
    independent of each other.

    Bounds can be provided on each parameter, in which case the distribution
    will be a truncated Gaussian distribution.  The PDF of a truncated
    Gaussian distribution is given by:

    .. math::
        p(x|a, b, \mu,\sigma) = \frac{1}{\sqrt{2 \pi \sigma^2}}\frac{e^{- \frac{\left( x - \mu \right)^2}{2 \sigma^2}}}{\Phi(b|\mu, \sigma) - \Phi(a|\mu, \sigma)},

    where :math:`\mu` is the mean, :math:`\sigma^2` is the variance,
    :math:`a,b` are the bounds, and :math:`\Phi` is the cumulative distribution
    of an unbounded normal distribution, given by:

    .. math::
        \Phi(x|\mu, \sigma) = \frac{1}{2}\left[1 + \mathrm{erf}\left(\frac{x-\mu}{\sigma \sqrt{2}}\right)\right].

    Note that if :math:`[a,b) = [-\infty, \infty)`, this reduces to a standard
    Gaussian distribution.


    Instances of this class can be called like a function. By default, logpdf
    will be called, but this can be changed by setting the class's __call__
    method to its pdf method.

    Parameters
    ----------
    \**params :
        The keyword arguments should provide the names of parameters and
        (optionally) some bounds, as either a tuple or a
        `boundaries.Bounds` instance. The mean and variance of each
        parameter can be provided by additional keyword arguments that have
        `_mean` and `_var` adding to the parameter name. For example,
        `foo=(-2,10), foo_mean=3, foo_var=2` would create a truncated Gaussian
        with mean 3 and variance 2, bounded between :math:`[-2, 10)`. If no
        mean or variance is provided, the distribution will have 0 mean and
        unit variance. If None is provided for the bounds, the distribution
        will be a normal, unbounded Gaussian (equivalent to setting the bounds
        to `[-inf, inf)`).

    Examples
    --------
    Create an unbounded Gaussian distribution with zero mean and unit variance:
    >>> dist = distributions.Gaussian(mass1=None)

    Create a bounded Gaussian distribution on :math:`[1,10)` with a mean of 3
    and a variance of 2:
    >>> dist = distributions.Gaussian(mass1=(1,10), mass1_mean=3, mass1_var=2)

    Create a bounded Gaussian distribution with the same parameters, but with
    cyclic boundary conditions:
    >>> dist = distributions.Gaussian(mass1=Bounds(1,10, cyclic=True), mass1_mean=3, mass1_var=2)
    """
    name = "gaussian"

    def __init__(self, **params):

        # save distribution parameters as dict
        # calculate the norm and exponential norm ahead of time
        # and save to self._norm, self._lognorm, and self._expnorm
        self._bounds = {}
        self._mean = {}
        self._var = {}
        self._norm = {}
        self._lognorm = {}
        self._expnorm = {}
        # pull out specified means, variance
        mean_args = [p for p in params if p.endswith('_mean')]
        var_args = [p for p in params if p.endswith('_var')]
        self._mean = dict([[p[:-5], params.pop(p)] for p in mean_args])
        self._var = dict([[p[:-4], params.pop(p)] for p in var_args])
        # initialize the bounds
        super(Gaussian, self).__init__(**params)

        # check that there are no params in mean/var that are not in params
        missing = set(self._mean.keys()) - set(params.keys())
        if any(missing):
            raise ValueError("means provided for unknow params {}".format(
                ', '.join(missing)))
        missing = set(self._var.keys()) - set(params.keys())
        if any(missing):
            raise ValueError("vars provided for unknow params {}".format(
                ', '.join(missing)))
        # set default mean/var for params not specified
        self._mean.update(dict([[p, 0.]
            for p in params if p not in self._mean]))
        self._var.update(dict([[p, 1.]
            for p in params if p not in self._var]))

        # compute norms
        for p,bnds in self._bounds.items():
            sigmasq = self._var[p]
            mu = self._mean[p]
            a,b = bnds
            invnorm = scipy.stats.norm.cdf(b, loc=mu, scale=sigmasq**0.5) \
                    - scipy.stats.norm.cdf(a, loc=mu, scale=sigmasq**0.5)
            invnorm *= numpy.sqrt(2*numpy.pi*sigmasq)
            self._norm[p] = 1./invnorm
            self._lognorm[p] = numpy.log(self._norm[p])
            self._expnorm[p] = -1./(2*sigmasq)


    @property
    def mean(self):
        return self._mean


    @property
    def var(self):
        return self._var

    def _normalcdf(self, param, value):
        """The CDF of the normal distribution, without bounds."""
        mu = self._mean[param]
        var = self._var[param]
        return 0.5*(1. + erf((value - mu)/(2*var)**0.5))


[docs]
    def cdf(self, param, value):
        """Returns the CDF of the given parameter value."""
        a, b = self._bounds[param]
        if a != -numpy.inf:
            phi_a = self._normalcdf(param, a)
        else:
            phi_a = 0.
        if b != numpy.inf:
            phi_b = self._normalcdf(param, b)
        else:
            phi_b = 1.
        phi_x = self._normalcdf(param, value)
        return (phi_x - phi_a)/(phi_b - phi_a)


    def _normalcdfinv(self, param, p):
        """The inverse CDF of the normal distribution, without bounds."""
        mu = self._mean[param]
        var = self._var[param]
        return mu + (2*var)**0.5 * erfinv(2*p - 1.)

    def _cdfinv_param(self, param, p):
        """Return inverse of the CDF.
        """
        a, b = self._bounds[param]
        if a != -numpy.inf:
            phi_a = self._normalcdf(param, a)
        else:
            phi_a = 0.
        if b != numpy.inf:
            phi_b = self._normalcdf(param, b)
        else:
            phi_b = 1.
        adjusted_p = phi_a + p * (phi_b - phi_a)
        return self._normalcdfinv(param, adjusted_p)

    def _pdf(self, **kwargs):
        """Returns the pdf at the given values. The keyword arguments must
        contain all of parameters in self's params. Unrecognized arguments are
        ignored.
        """
        return numpy.exp(self._logpdf(**kwargs))


    def _logpdf(self, **kwargs):
        """Returns the log of the pdf at the given values. The keyword
        arguments must contain all of parameters in self's params. Unrecognized
        arguments are ignored.
        """
        if kwargs in self:
            return sum([self._lognorm[p] +
                        self._expnorm[p]*(kwargs[p]-self._mean[p])**2.
                        for p in self._params])
        else:
            return -numpy.inf


[docs]
    @classmethod
    def from_config(cls, cp, section, variable_args):
        """Returns a Gaussian distribution based on a configuration file. The
        parameters for the distribution are retrieved from the section titled
        "[`section`-`variable_args`]" in the config file.

        Boundary arguments should be provided in the same way as described in
        `get_param_bounds_from_config`. In addition, the mean and variance of
        each parameter can be specified by setting `{param}_mean` and
        `{param}_var`, respectively. For example, the following would create a
        truncated Gaussian distribution between 0 and 6.28 for a parameter
        called `phi` with mean 3.14 and variance 0.5 that is cyclic:

        .. code-block:: ini

            [{section}-{tag}]
            min-phi = 0
            max-phi = 6.28
            phi_mean = 3.14
            phi_var = 0.5
            cyclic =

        Parameters
        ----------
        cp : pycbc.workflow.WorkflowConfigParser
            A parsed configuration file that contains the distribution
            options.
        section : str
            Name of the section in the configuration file.
        variable_args : str
            The names of the parameters for this distribution, separated by
            `prior.VARARGS_DELIM`. These must appear in the "tag" part
            of the section header.

        Returns
        -------
        Gaussian
            A distribution instance from the pycbc.inference.prior module.
        """
        return bounded.bounded_from_config(cls, cp, section, variable_args,
                                                  bounds_required=False)




__all__ = ['Gaussian']