# Copyright (C) 2012 Tito Dal Canton
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""Utilites to estimate PSDs from data.
"""
import numpy
from pycbc.types import Array, FrequencySeries, TimeSeries, zeros
from pycbc.types import real_same_precision_as, complex_same_precision_as
from pycbc.fft import fft, ifft
# Change to True in front-end if you want this function to use caching
# This is a mostly-hidden optimization option that most users will not want
# to use. It is used in PyCBC Live
USE_CACHING_FOR_WELCH_FFTS = False
USE_CACHING_FOR_INV_SPEC_TRUNC = False
# If using caching we want output to be unique if called at different places
# (and if called from different modules/functions), these unique IDs acheive
# that. The numbers are not significant, only that they are unique.
WELCH_UNIQUE_ID = 438716587
INVSPECTRUNC_UNIQUE_ID = 100257896
[docs]
def welch(timeseries, seg_len=4096, seg_stride=2048, window='hann',
avg_method='median', num_segments=None, require_exact_data_fit=False):
"""PSD estimator based on Welch's method.
Parameters
----------
timeseries : TimeSeries
Time series for which the PSD is to be estimated.
seg_len : int
Segment length in samples.
seg_stride : int
Separation between consecutive segments, in samples.
window : {'hann', numpy.ndarray}
Function used to window segments before Fourier transforming, or
a `numpy.ndarray` that specifies the window.
avg_method : {'median', 'mean', 'median-mean'}
Method used for averaging individual segment PSDs.
Returns
-------
psd : FrequencySeries
Frequency series containing the estimated PSD.
Raises
------
ValueError
For invalid choices of `seg_len`, `seg_stride` `window` and
`avg_method` and for inconsistent combinations of len(`timeseries`),
`seg_len` and `seg_stride`.
Notes
-----
See arXiv:gr-qc/0509116 for details.
"""
from pycbc.strain.strain import execute_cached_fft
window_map = {
'hann': numpy.hanning
}
# sanity checks
if isinstance(window, numpy.ndarray) and window.size != seg_len:
raise ValueError('Invalid window: incorrect window length')
if not isinstance(window, numpy.ndarray) and window not in window_map:
raise ValueError('Invalid window: unknown window {!r}'.format(window))
if avg_method not in ('mean', 'median', 'median-mean'):
raise ValueError('Invalid averaging method')
if type(seg_len) is not int or type(seg_stride) is not int \
or seg_len <= 0 or seg_stride <= 0:
raise ValueError('Segment length and stride must be positive integers')
if timeseries.precision == 'single':
fs_dtype = numpy.complex64
elif timeseries.precision == 'double':
fs_dtype = numpy.complex128
num_samples = len(timeseries)
if num_segments is None:
num_segments = int(num_samples // seg_stride)
# NOTE: Is this not always true?
if (num_segments - 1) * seg_stride + seg_len > num_samples:
num_segments -= 1
if not require_exact_data_fit:
data_len = (num_segments - 1) * seg_stride + seg_len
# Get the correct amount of data
if data_len < num_samples:
diff = num_samples - data_len
start = diff // 2
end = num_samples - diff // 2
# Want this to be integers so if diff is odd, catch it here.
if diff % 2:
start = start + 1
timeseries = timeseries[start:end]
num_samples = len(timeseries)
if data_len > num_samples:
err_msg = "I was asked to estimate a PSD on %d " %(data_len)
err_msg += "data samples. However the data provided only contains "
err_msg += "%d data samples." %(num_samples)
if num_samples != (num_segments - 1) * seg_stride + seg_len:
raise ValueError('Incorrect choice of segmentation parameters')
if not isinstance(window, numpy.ndarray):
window = window_map[window](seg_len)
w = Array(window.astype(timeseries.dtype))
# calculate psd of each segment
delta_f = 1. / timeseries.delta_t / seg_len
if not USE_CACHING_FOR_WELCH_FFTS:
segment_tilde = FrequencySeries(
numpy.zeros(int(seg_len / 2 + 1)),
delta_f=delta_f,
dtype=fs_dtype,
)
segment_psds = []
for i in range(num_segments):
segment_start = i * seg_stride
segment_end = segment_start + seg_len
segment = timeseries[segment_start:segment_end]
assert len(segment) == seg_len
if not USE_CACHING_FOR_WELCH_FFTS:
fft(segment * w, segment_tilde)
else:
segment_tilde = execute_cached_fft(segment * w,
uid=WELCH_UNIQUE_ID)
seg_psd = abs(segment_tilde * segment_tilde.conj()).numpy()
#halve the DC and Nyquist components to be consistent with TO10095
seg_psd[0] /= 2
seg_psd[-1] /= 2
segment_psds.append(seg_psd)
segment_psds = numpy.array(segment_psds)
if avg_method == 'mean':
psd = numpy.mean(segment_psds, axis=0)
elif avg_method == 'median':
psd = numpy.median(segment_psds, axis=0) / median_bias(num_segments)
elif avg_method == 'median-mean':
odd_psds = segment_psds[::2]
even_psds = segment_psds[1::2]
odd_median = numpy.median(odd_psds, axis=0) / \
median_bias(len(odd_psds))
even_median = numpy.median(even_psds, axis=0) / \
median_bias(len(even_psds))
psd = (odd_median + even_median) / 2
w = w.numpy()
psd *= 2 * delta_f * seg_len / (w*w).sum()
return FrequencySeries(psd, delta_f=delta_f, dtype=timeseries.dtype,
epoch=timeseries.start_time)
[docs]
def inverse_spectrum_truncation(psd, max_filter_len, low_frequency_cutoff=None, trunc_method=None):
"""Modify a PSD such that the impulse response associated with its inverse
square root is no longer than `max_filter_len` time samples. In practice
this corresponds to a coarse graining or smoothing of the PSD.
Parameters
----------
psd : FrequencySeries
PSD whose inverse spectrum is to be truncated.
max_filter_len : int
Maximum length of the time-domain filter in samples.
low_frequency_cutoff : {None, int}
Frequencies below `low_frequency_cutoff` are zeroed in the output.
trunc_method : {None, 'hann'}
Function used for truncating the time-domain filter.
None produces a hard truncation at `max_filter_len`.
Returns
-------
psd : FrequencySeries
PSD whose inverse spectrum has been truncated.
Raises
------
ValueError
For invalid types or values of `max_filter_len` and `low_frequency_cutoff`.
Notes
-----
See arXiv:gr-qc/0509116 for details.
"""
from pycbc.strain.strain import execute_cached_fft, execute_cached_ifft
# sanity checks
if type(max_filter_len) is not int or max_filter_len <= 0:
raise ValueError('max_filter_len must be a positive integer')
if low_frequency_cutoff is not None and \
(low_frequency_cutoff < 0 or
low_frequency_cutoff > psd.sample_frequencies[-1]):
raise ValueError('low_frequency_cutoff must be within the bandwidth of the PSD')
N = (len(psd)-1)*2
inv_asd = FrequencySeries(zeros(len(psd)), delta_f=psd.delta_f, \
dtype=complex_same_precision_as(psd))
kmin = 1
if low_frequency_cutoff:
kmin = int(low_frequency_cutoff / psd.delta_f)
inv_asd[kmin:N//2] = (1.0 / psd[kmin:N//2]) ** 0.5
if not USE_CACHING_FOR_INV_SPEC_TRUNC:
q = TimeSeries(
numpy.zeros(N),
delta_t=(N / psd.delta_f),
dtype=real_same_precision_as(psd)
)
ifft(inv_asd, q)
else:
q = execute_cached_ifft(inv_asd, copy_output=False,
uid=INVSPECTRUNC_UNIQUE_ID)
trunc_start = max_filter_len // 2
trunc_end = N - max_filter_len // 2
if trunc_end < trunc_start:
raise ValueError('Invalid value in inverse_spectrum_truncation')
if trunc_method == 'hann':
trunc_window = Array(numpy.hanning(max_filter_len), dtype=q.dtype)
q[0:trunc_start] *= trunc_window[-trunc_start:]
q[trunc_end:N] *= trunc_window[0:max_filter_len//2]
if trunc_start < trunc_end:
q[trunc_start:trunc_end] = 0
if not USE_CACHING_FOR_INV_SPEC_TRUNC:
psd_trunc = FrequencySeries(
numpy.zeros(len(psd)),
delta_f=psd.delta_f,
dtype=complex_same_precision_as(psd)
)
fft(q, psd_trunc)
else:
psd_trunc = execute_cached_fft(q, copy_output=False,
uid=INVSPECTRUNC_UNIQUE_ID)
psd_trunc *= psd_trunc.conj()
psd_out = 1. / abs(psd_trunc)
return psd_out
[docs]
def interpolate(series, delta_f, length=None):
"""Return a new PSD that has been interpolated to the desired delta_f.
Parameters
----------
series : FrequencySeries
Frequency series to be interpolated.
delta_f : float
The desired delta_f of the output
length : None or int
The desired number of frequency samples. The default is None,
so it will be calculated from the given `series` and `delta_f`.
But this will cause an inconsistency issue of length sometimes,
so if `length` is given, then just use it.
Returns
-------
interpolated series : FrequencySeries
A new FrequencySeries that has been interpolated.
"""
if length is None:
new_n = (len(series)-1) * series.delta_f / delta_f + 1
else:
new_n = length
samples = numpy.arange(0, numpy.rint(new_n)) * delta_f
interpolated_series = numpy.interp(samples, series.sample_frequencies.numpy(), series.numpy())
return FrequencySeries(interpolated_series, epoch=series.epoch,
delta_f=delta_f, dtype=series.dtype)
[docs]
def bandlimited_interpolate(series, delta_f):
"""Return a new PSD that has been interpolated to the desired delta_f.
Parameters
----------
series : FrequencySeries
Frequency series to be interpolated.
delta_f : float
The desired delta_f of the output
Returns
-------
interpolated series : FrequencySeries
A new FrequencySeries that has been interpolated.
"""
series = FrequencySeries(series, dtype=complex_same_precision_as(series), delta_f=series.delta_f)
N = (len(series) - 1) * 2
delta_t = 1.0 / series.delta_f / N
new_N = int(1.0 / (delta_t * delta_f))
new_n = new_N // 2 + 1
series_in_time = TimeSeries(zeros(N), dtype=real_same_precision_as(series), delta_t=delta_t)
ifft(series, series_in_time)
padded_series_in_time = TimeSeries(zeros(new_N), dtype=series_in_time.dtype, delta_t=delta_t)
padded_series_in_time[0:N//2] = series_in_time[0:N//2]
padded_series_in_time[new_N-N//2:new_N] = series_in_time[N//2:N]
interpolated_series = FrequencySeries(zeros(new_n), dtype=series.dtype, delta_f=delta_f)
fft(padded_series_in_time, interpolated_series)
return interpolated_series