Source code for pycbc.vetoes.sgchisq

"""Chisq based on sine-gaussian tiles.
See https://arxiv.org/abs/1709.08974 for a discussion.
"""

import numpy

from pycbc.waveform.utils import apply_fseries_time_shift
from pycbc.filter import sigma
from pycbc.waveform import sinegauss
from pycbc.vetoes.chisq import SingleDetPowerChisq
from pycbc.events import ranking

[docs] class SingleDetSGChisq(SingleDetPowerChisq): """Class that handles precomputation and memory management for efficiently running the sine-Gaussian chisq """ returns = {'sg_chisq': numpy.float32} def __init__(self, bank, num_bins=0, snr_threshold=None, chisq_locations=None): """ Create sine-Gaussian Chisq Calculator Parameters ---------- bank: pycbc.waveform.TemplateBank The template bank that will be processed. num_bins: str The string determining the number of power chisq bins snr_threshold: float The threshold to calculate the sine-Gaussian chisq chisq_locations: list of strs List of strings which detail where to place a sine-Gaussian. The format is 'region-boolean:q1-offset1,q2-offset2'. The offset is relative to the end frequency of the approximant. The region is a boolean expression such as 'mtotal>40' indicating which templates to apply this set of sine-Gaussians to. """ if snr_threshold is not None: self.do = True self.num_bins = num_bins self.snr_threshold = snr_threshold self.params = {} for descr in chisq_locations: region, values = descr.split(":") mask = bank.table.parse_boolargs([(1, region), (0, 'else')])[0] hashes = bank.table['template_hash'][mask.astype(bool)] for h in hashes: self.params[h] = values else: self.do = False
[docs] @staticmethod def insert_option_group(parser): group = parser.add_argument_group("Sine-Gaussian Chisq") group.add_argument("--sgchisq-snr-threshold", type=float, help="Minimum SNR threshold to use SG chisq") group.add_argument("--sgchisq-locations", type=str, nargs='+', help="Frequency offsets and quality factors of the sine-Gaussians" " to use, format 'region-boolean:q1-offset1,q2-offset2'. " "Offset is relative to the end frequency of the approximant." " Region is a boolean expression selecting templates to " "apply the sine-Gaussians to, ex. 'mtotal>40'")
[docs] @classmethod def from_cli(cls, args, bank, chisq_bins): return cls(bank, chisq_bins, args.sgchisq_snr_threshold, args.sgchisq_locations)
[docs] def values(self, stilde, template, psd, snrv, snr_norm, bchisq, bchisq_dof, indices): """ Calculate sine-Gaussian chisq Parameters ---------- stilde: pycbc.types.Frequencyseries The overwhitened strain template: pycbc.types.Frequencyseries The waveform template being analyzed psd: pycbc.types.Frequencyseries The power spectral density of the data snrv: numpy.ndarray The peak unnormalized complex SNR values snr_norm: float The normalization factor for the snr bchisq: numpy.ndarray The Bruce Allen power chisq values for these triggers bchisq_dof: numpy.ndarray The degrees of freedom of the Bruce chisq indics: numpy.ndarray The indices of the snr peaks. Returns ------- chisq: Array Chisq values, one for each sample index """ if not self.do: return None if template.params.template_hash not in self.params: return numpy.ones(len(snrv)) values = self.params[template.params.template_hash].split(',') # Get the chisq bins to use as the frequency reference point bins = self.cached_chisq_bins(template, psd) # This is implemented slowly, so let's not call it often, OK? chisq = numpy.ones(len(snrv)) for i, snrvi in enumerate(snrv): #Skip if newsnr too low snr = abs(snrvi * snr_norm) nsnr = ranking.newsnr(snr, bchisq[i] / bchisq_dof[i]) if nsnr < self.snr_threshold: continue N = (len(template) - 1) * 2 dt = 1.0 / (N * template.delta_f) kmin = int(template.f_lower / psd.delta_f) time = float(template.epoch) + dt * indices[i] # Shift the time of interest to be centered on 0 stilde_shift = apply_fseries_time_shift(stilde, -time) # Only apply the sine-Gaussian in a +-50 Hz range around the # central frequency qwindow = 50 chisq[i] = 0 # Estimate the maximum frequency up to which the waveform has # power by approximating power per frequency # as constant over the last 2 chisq bins. We cannot use the final # chisq bin edge as it does not have to be where the waveform # terminates. fstep = (bins[-2] - bins[-3]) fpeak = (bins[-2] + fstep) * template.delta_f # This is 90% of the Nyquist frequency of the data # This allows us to avoid issues near Nyquist due to resample # Filtering fstop = len(stilde) * stilde.delta_f * 0.9 dof = 0 # Calculate the sum of SNR^2 for the sine-Gaussians specified for descr in values: # Get the q and frequency offset from the descriptor q, offset = descr.split('-') q, offset = float(q), float(offset) fcen = fpeak + offset flow = max(kmin * template.delta_f, fcen - qwindow) fhigh = fcen + qwindow # If any sine-gaussian tile has an upper frequency near # nyquist return 1 instead. if fhigh > fstop: return numpy.ones(len(snrv)) kmin = int(flow / template.delta_f) kmax = int(fhigh / template.delta_f) #Calculate sine-gaussian tile gtem = sinegauss.fd_sine_gaussian(1.0, q, fcen, flow, len(template) * template.delta_f, template.delta_f).astype(numpy.complex64) gsigma = sigma(gtem, psd=psd, low_frequency_cutoff=flow, high_frequency_cutoff=fhigh) #Calculate the SNR of the tile gsnr = (gtem[kmin:kmax] * stilde_shift[kmin:kmax]).sum() gsnr *= 4.0 * gtem.delta_f / gsigma chisq[i] += abs(gsnr)**2.0 dof += 2 if dof == 0: chisq[i] = 1 else: chisq[i] /= dof return chisq