# Source code for pycbc.filter.autocorrelation

# Copyright (C) 2016  Christopher M. Biwer
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

#
# =============================================================================
#
#                                   Preamble
#
# =============================================================================
#
"""
This modules provides functions for calculating the autocorrelation function
and length of a data series.
"""

import numpy
from math import isnan
from pycbc.filter.matchedfilter import correlate
from pycbc.types import FrequencySeries, TimeSeries, zeros

[docs]def calculate_acf(data, delta_t=1.0, unbiased=False):
r"""Calculates the one-sided autocorrelation function.

Calculates the autocorrelation function (ACF) and returns the one-sided
ACF. The ACF is defined as the autocovariance divided by the variance. The
ACF can be estimated using

.. math::

\hat{R}(k) = \frac{1}{n \sigma^{2}} \sum_{t=1}^{n-k} \left( X_{t} - \mu \right) \left( X_{t+k} - \mu \right)

Where :math:\hat{R}(k) is the ACF, :math:X_{t} is the data series at
time t, :math:\mu is the mean of :math:X_{t}, and :math:\sigma^{2} is
the variance of :math:X_{t}.

Parameters
-----------
data : TimeSeries or numpy.array
A TimeSeries or numpy.array of data.
delta_t : float
The time step of the data series if it is not a TimeSeries instance.
unbiased : bool
If True the normalization of the autocovariance function is n-k
instead of n. This is called the unbiased estimation of the
autocovariance. Note that this does not mean the ACF is unbiased.

Returns
-------
acf : numpy.array
If data is a TimeSeries then acf will be a TimeSeries of the
one-sided ACF. Else acf is a numpy.array.
"""

# if given a TimeSeries instance then get numpy.array
if isinstance(data, TimeSeries):
y = data.numpy()
delta_t = data.delta_t
else:
y = data

# Zero mean
y = y - y.mean()
ny_orig = len(y)

while npad < 2*ny_orig:

# FFT data minus the mean
fdata = TimeSeries(ypad, delta_t=delta_t).to_frequencyseries()

# correlate
# do not need to give the congjugate since correlate function does it
cdata = FrequencySeries(zeros(len(fdata), dtype=fdata.dtype),
delta_f=fdata.delta_f, copy=False)
correlate(fdata, fdata, cdata)

# IFFT correlated data to get unnormalized autocovariance time series
acf = cdata.to_timeseries()
acf = acf[:ny_orig]

# normalize the autocovariance
# note that dividing by acf[0] is the same as ( y.var() * len(acf) )
if unbiased:
acf /= ( y.var() * numpy.arange(len(acf), 0, -1) )
else:
acf /= acf[0]

# return input datatype
if isinstance(data, TimeSeries):
return TimeSeries(acf, delta_t=delta_t)
else:
return acf

[docs]def calculate_acl(data, m=5, dtype=int):
r"""Calculates the autocorrelation length (ACL).

Given a normalized autocorrelation function :math:\rho[i] (by normalized,
we mean that :math:\rho[0] = 1), the ACL :math:\tau is:

.. math::

\tau = 1 + 2 \sum_{i=1}^{K} \rho[i].

The number of samples used :math:K is found by using the first point
such that:

.. math::

m \tau[K] \leq K,

where :math:m is a tuneable parameter (default = 5). If no such point
exists, then the given data set it too short to estimate the ACL; in this
case inf is returned.

This algorithm for computing the ACL is taken from:

N. Madras and A.D. Sokal, J. Stat. Phys. 50, 109 (1988).

Parameters
-----------
data : TimeSeries or array
A TimeSeries of data.
m : int
The number of autocorrelation lengths to use for determining the window
size :math:K (see above).
dtype : int or float
The datatype of the output. If the dtype was set to int, then the
ceiling is returned.

Returns
-------
acl : int or float
The autocorrelation length. If the ACL cannot be estimated, returns
numpy.inf.
"""

# sanity check output data type
if dtype not in [int, float]:
raise ValueError("The dtype must be either int or float.")

# if we have only a single point, just return 1
if len(data) < 2:
return 1

# calculate ACF that is normalized by the zero-lag value
acf = calculate_acf(data)

cacf = 2 * acf.numpy().cumsum() - 1
win = m * cacf <= numpy.arange(len(cacf))
if win.any():
acl = cacf[numpy.where(win)[0][0]]
if dtype == int:
acl = int(numpy.ceil(acl))
else:
acl = numpy.inf
return acl