# Copyright (C) 2013 Ian W. Harry
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
import copy
import numpy
import logging
from pycbc.tmpltbank import coord_utils
logger = logging.getLogger('pycbc.tmpltbank.partitioned_bank')
[docs]
class PartitionedTmpltbank(object):
"""
This class is used to hold a template bank partitioned into numerous bins
based on position in the Cartesian parameter space where the axes are the
principal components. It can also be used to hold intermediary
products used while constructing (e.g.) a stochastic template bank.
"""
def __init__(self, mass_range_params, metric_params, ref_freq,
bin_spacing, bin_range_check=1):
"""
Set up the partitioned template bank class. The combination of the
reference frequency, the bin spacing and the metric dictates how the
parameter space will be partitioned.
Parameters
-----------
mass_range_params : massRangeParameters object
An initialized massRangeParameters object holding the details of
the mass and spin ranges being considered.
metric_params : metricParameters object
An initialized metricParameters object holding the details of the
parameter space metric that is being used.
ref_freq : float
The reference frequency to use as the upper frequency cutoff of
the metric when partitioning the bank. In general this would be
set to the *smallest* upper frequency cutoff that is possible in
the given parameter space. However, in some cases this can lead
to only a small number of partitions and the computational cost
will increase dramatically. NOTE: when using the vary-fupper
option this upper frequency cutoff is only used to determine which
points should be matched against each other, it is *not* used in
the actual metric-based calculation of the distance (which uses the
frequency cutoffs of the points being considered).
bin_spacing : float
The metric distance to space the bins by. NOTE: If you want to
place the bins to have a width corresponding to a minimal match of
0.97 you would set this to :math:`(1 - 0.97)^{0.5}`.
Note the square root,
matches correspond to the square of parameter space distance.
bin_range_check : int
When computing matches consider points in the corresponding bin and
all bins +/- this value in both chi_1 and chi_2 directions.
DEFAULT = 1.
"""
# Flags to be used in other methods of this class. Initialized here for
# simplicity
self.spin_warning_given = False
# These will probably be used a lot, so add to object
self.mass_range_params = mass_range_params
self.metric_params = metric_params
self.ref_freq = ref_freq
self.bin_spacing = bin_spacing
# Get parameter space extent
vals = coord_utils.estimate_mass_range(1000000, mass_range_params,
metric_params, ref_freq, covary=True)
chi1_max = vals[0].max()
chi1_min = vals[0].min()
chi1_diff = chi1_max - chi1_min
chi2_max = vals[1].max()
chi2_min = vals[1].min()
chi2_diff = chi2_max - chi2_min
# Add a little bit extra as we may not have reached the edges.
# FIXME: Maybe better to use the numerical code to find maxima here?
chi1_min = chi1_min - 0.1*chi1_diff
chi1_max = chi1_max + 0.1*chi1_diff
chi2_min = chi2_min - 0.1*chi2_diff
chi2_max = chi2_max + 0.1*chi2_diff
massbank = {}
bank = {}
# Also add a little bit here
for i in range(-2, int((chi1_max - chi1_min) // bin_spacing + 2)):
bank[i] = {}
massbank[i] = {}
for j in range(-2, int((chi2_max - chi2_min) // bin_spacing + 2)):
bank[i][j] = []
massbank[i][j] = {}
massbank[i][j]['mass1s'] = numpy.array([])
self.massbank = massbank
self.bank = bank
# Record minimum and maximum bins
self.min_chi1_bin = -2
self.min_chi2_bin = -2
self.max_chi1_bin = int((chi1_max - chi1_min) // bin_spacing + 1)
self.max_chi2_bin = int((chi2_max - chi2_min) // bin_spacing + 1)
self.chi1_min = chi1_min
self.chi1_max = chi1_max
self.chi2_min = chi2_min
self.chi2_max = chi2_max
# How many adjacent bins should we check?
self.bin_range_check = 1
self.bin_loop_order = coord_utils.outspiral_loop(self.bin_range_check)
[docs]
def get_point_from_bins_and_idx(self, chi1_bin, chi2_bin, idx):
"""Find masses and spins given bin numbers and index.
Given the chi1 bin, chi2 bin and an index, return the masses and spins
of the point at that index. Will fail if no point exists there.
Parameters
-----------
chi1_bin : int
The bin number for chi1.
chi2_bin : int
The bin number for chi2.
idx : int
The index within the chi1, chi2 bin.
Returns
--------
mass1 : float
Mass of heavier body.
mass2 : float
Mass of lighter body.
spin1z : float
Spin of heavier body.
spin2z : float
Spin of lighter body.
"""
mass1 = self.massbank[chi1_bin][chi2_bin]['mass1s'][idx]
mass2 = self.massbank[chi1_bin][chi2_bin]['mass2s'][idx]
spin1z = self.massbank[chi1_bin][chi2_bin]['spin1s'][idx]
spin2z = self.massbank[chi1_bin][chi2_bin]['spin2s'][idx]
return mass1, mass2, spin1z, spin2z
[docs]
def get_freq_map_and_normalizations(self, frequency_list,
upper_freq_formula):
"""
If using the --vary-fupper capability we need to store the mapping
between index and frequencies in the list. We also precalculate the
normalization factor at every frequency, which is used when estimating
overlaps to account for abrupt changes in termination frequency.
Parameters
-----------
frequency_list : array of floats
The frequencies for which the metric has been computed and lie
within the parameter space being considered.
upper_freq_formula : string
"""
self.frequency_map = {}
self.normalization_map = {}
self.upper_freq_formula = upper_freq_formula
# FIXME: Must this be sorted on input
frequency_list.sort()
for idx, frequency in enumerate(frequency_list):
self.frequency_map[frequency] = idx
self.normalization_map[frequency] = \
(self.metric_params.moments['I7'][frequency])**0.5
[docs]
def find_point_bin(self, chi_coords):
"""
Given a set of coordinates in the chi parameter space, identify the
indices of the chi1 and chi2 bins that the point occurs in. Returns
these indices.
Parameters
-----------
chi_coords : numpy.array
The position of the point in the chi coordinates.
Returns
--------
chi1_bin : int
Index of the chi_1 bin.
chi2_bin : int
Index of the chi_2 bin.
"""
# Identify bin
chi1_bin = int((chi_coords[0] - self.chi1_min) // self.bin_spacing)
chi2_bin = int((chi_coords[1] - self.chi2_min) // self.bin_spacing)
self.check_bin_existence(chi1_bin, chi2_bin)
return chi1_bin, chi2_bin
[docs]
def check_bin_existence(self, chi1_bin, chi2_bin):
"""
Given indices for bins in chi1 and chi2 space check that the bin
exists in the object. If not add it. Also check for the existence of
all bins within +/- self.bin_range_check and add if not present.
Parameters
-----------
chi1_bin : int
The index of the chi1_bin to check
chi2_bin : int
The index of the chi2_bin to check
"""
bin_range_check = self.bin_range_check
# Check if this bin actually exists. If not add it
if ( (chi1_bin < self.min_chi1_bin+bin_range_check) or
(chi1_bin > self.max_chi1_bin-bin_range_check) or
(chi2_bin < self.min_chi2_bin+bin_range_check) or
(chi2_bin > self.max_chi2_bin-bin_range_check) ):
for temp_chi1 in range(chi1_bin-bin_range_check,
chi1_bin+bin_range_check+1):
if temp_chi1 not in self.massbank:
self.massbank[temp_chi1] = {}
self.bank[temp_chi1] = {}
for temp_chi2 in range(chi2_bin-bin_range_check,
chi2_bin+bin_range_check+1):
if temp_chi2 not in self.massbank[temp_chi1]:
self.massbank[temp_chi1][temp_chi2] = {}
self.massbank[temp_chi1][temp_chi2]['mass1s'] =\
numpy.array([])
self.bank[temp_chi1][temp_chi2] = []
[docs]
def calc_point_distance(self, chi_coords):
"""
Calculate distance between point and the bank. Return the closest
distance.
Parameters
-----------
chi_coords : numpy.array
The position of the point in the chi coordinates.
Returns
--------
min_dist : float
The smallest **SQUARED** metric distance between the test point and
the bank.
indexes : The chi1_bin, chi2_bin and position within that bin at which
the closest matching point lies.
"""
chi1_bin, chi2_bin = self.find_point_bin(chi_coords)
min_dist = 1000000000
indexes = None
for chi1_bin_offset, chi2_bin_offset in self.bin_loop_order:
curr_chi1_bin = chi1_bin + chi1_bin_offset
curr_chi2_bin = chi2_bin + chi2_bin_offset
for idx, bank_chis in \
enumerate(self.bank[curr_chi1_bin][curr_chi2_bin]):
dist = coord_utils.calc_point_dist(chi_coords, bank_chis)
if dist < min_dist:
min_dist = dist
indexes = (curr_chi1_bin, curr_chi2_bin, idx)
return min_dist, indexes
[docs]
def test_point_distance(self, chi_coords, distance_threshold):
"""
Test if the distance between the supplied point and the bank is less
than the supplied distance theshold.
Parameters
-----------
chi_coords : numpy.array
The position of the point in the chi coordinates.
distance_threshold : float
The **SQUARE ROOT** of the metric distance to test as threshold.
E.g. if you want to test to a minimal match of 0.97 you would
use 1 - 0.97 = 0.03 for this value.
Returns
--------
Boolean
True if point is within the distance threshold. False if not.
"""
chi1_bin, chi2_bin = self.find_point_bin(chi_coords)
for chi1_bin_offset, chi2_bin_offset in self.bin_loop_order:
curr_chi1_bin = chi1_bin + chi1_bin_offset
curr_chi2_bin = chi2_bin + chi2_bin_offset
for bank_chis in self.bank[curr_chi1_bin][curr_chi2_bin]:
dist = coord_utils.calc_point_dist(chi_coords, bank_chis)
if dist < distance_threshold:
return True
else:
return False
[docs]
def calc_point_distance_vary(self, chi_coords, point_fupper, mus):
"""
Calculate distance between point and the bank allowing the metric to
vary based on varying upper frequency cutoff. Slower than
calc_point_distance, but more reliable when upper frequency cutoff can
change a lot.
Parameters
-----------
chi_coords : numpy.array
The position of the point in the chi coordinates.
point_fupper : float
The upper frequency cutoff to use for this point. This value must
be one of the ones already calculated in the metric.
mus : numpy.array
A 2D array where idx 0 holds the upper frequency cutoff and idx 1
holds the coordinates in the [not covaried] mu parameter space for
each value of the upper frequency cutoff.
Returns
--------
min_dist : float
The smallest **SQUARED** metric distance between the test point and
the bank.
indexes : The chi1_bin, chi2_bin and position within that bin at which
the closest matching point lies.
"""
chi1_bin, chi2_bin = self.find_point_bin(chi_coords)
min_dist = 1000000000
indexes = None
for chi1_bin_offset, chi2_bin_offset in self.bin_loop_order:
curr_chi1_bin = chi1_bin + chi1_bin_offset
curr_chi2_bin = chi2_bin + chi2_bin_offset
# No points = Next iteration
curr_bank = self.massbank[curr_chi1_bin][curr_chi2_bin]
if not curr_bank['mass1s'].size:
continue
# *NOT* the same of .min and .max
f_upper = numpy.minimum(point_fupper, curr_bank['freqcuts'])
f_other = numpy.maximum(point_fupper, curr_bank['freqcuts'])
# NOTE: freq_idxes is a vector!
freq_idxes = numpy.array([self.frequency_map[f] for f in f_upper])
# vecs1 gives a 2x2 vector: idx0 = stored index, idx1 = mu index
vecs1 = mus[freq_idxes, :]
# vecs2 gives a 2x2 vector: idx0 = stored index, idx1 = mu index
range_idxes = numpy.arange(len(freq_idxes))
vecs2 = curr_bank['mus'][range_idxes, freq_idxes, :]
# Now do the sums
dists = (vecs1 - vecs2)*(vecs1 - vecs2)
# This reduces to 1D: idx = stored index
dists = numpy.sum(dists, axis=1)
norm_upper = numpy.array([self.normalization_map[f] \
for f in f_upper])
norm_other = numpy.array([self.normalization_map[f] \
for f in f_other])
norm_fac = norm_upper / norm_other
renormed_dists = 1 - (1 - dists)*norm_fac
curr_min_dist = renormed_dists.min()
if curr_min_dist < min_dist:
min_dist = curr_min_dist
indexes = curr_chi1_bin, curr_chi2_bin, renormed_dists.argmin()
return min_dist, indexes
[docs]
def test_point_distance_vary(self, chi_coords, point_fupper, mus,
distance_threshold):
"""
Test if distance between point and the bank is greater than distance
threshold while allowing the metric to
vary based on varying upper frequency cutoff. Slower than
test_point_distance, but more reliable when upper frequency cutoff can
change a lot.
Parameters
-----------
chi_coords : numpy.array
The position of the point in the chi coordinates.
point_fupper : float
The upper frequency cutoff to use for this point. This value must
be one of the ones already calculated in the metric.
mus : numpy.array
A 2D array where idx 0 holds the upper frequency cutoff and idx 1
holds the coordinates in the [not covaried] mu parameter space for
each value of the upper frequency cutoff.
distance_threshold : float
The **SQUARE ROOT** of the metric distance to test as threshold.
E.g. if you want to test to a minimal match of 0.97 you would
use 1 - 0.97 = 0.03 for this value.
Returns
--------
Boolean
True if point is within the distance threshold. False if not.
"""
chi1_bin, chi2_bin = self.find_point_bin(chi_coords)
for chi1_bin_offset, chi2_bin_offset in self.bin_loop_order:
curr_chi1_bin = chi1_bin + chi1_bin_offset
curr_chi2_bin = chi2_bin + chi2_bin_offset
# No points = Next iteration
curr_bank = self.massbank[curr_chi1_bin][curr_chi2_bin]
if not curr_bank['mass1s'].size:
continue
# *NOT* the same of .min and .max
f_upper = numpy.minimum(point_fupper, curr_bank['freqcuts'])
f_other = numpy.maximum(point_fupper, curr_bank['freqcuts'])
# NOTE: freq_idxes is a vector!
freq_idxes = numpy.array([self.frequency_map[f] for f in f_upper])
# vecs1 gives a 2x2 vector: idx0 = stored index, idx1 = mu index
vecs1 = mus[freq_idxes, :]
# vecs2 gives a 2x2 vector: idx0 = stored index, idx1 = mu index
range_idxes = numpy.arange(len(freq_idxes))
vecs2 = curr_bank['mus'][range_idxes,freq_idxes,:]
# Now do the sums
dists = (vecs1 - vecs2)*(vecs1 - vecs2)
# This reduces to 1D: idx = stored index
dists = numpy.sum(dists, axis=1)
# I wonder if this line actually speeds things up?
if (dists > distance_threshold).all():
continue
# This is only needed for close templates, should we prune?
norm_upper = numpy.array([self.normalization_map[f] \
for f in f_upper])
norm_other = numpy.array([self.normalization_map[f] \
for f in f_other])
norm_fac = norm_upper / norm_other
renormed_dists = 1 - (1 - dists)*norm_fac
if (renormed_dists < distance_threshold).any():
return True
else:
return False
[docs]
def add_point_by_chi_coords(self, chi_coords, mass1, mass2, spin1z, spin2z,
point_fupper=None, mus=None):
"""
Add a point to the partitioned template bank. The point_fupper and mus
kwargs must be provided for all templates if the vary fupper capability
is desired. This requires that the chi_coords, as well as mus and
point_fupper if needed, to be precalculated. If you just have the
masses and don't want to worry about translations see
add_point_by_masses, which will do translations and then call this.
Parameters
-----------
chi_coords : numpy.array
The position of the point in the chi coordinates.
mass1 : float
The heavier mass of the point to add.
mass2 : float
The lighter mass of the point to add.
spin1z: float
The [aligned] spin on the heavier body.
spin2z: float
The [aligned] spin on the lighter body.
The upper frequency cutoff to use for this point. This value must
be one of the ones already calculated in the metric.
mus : numpy.array
A 2D array where idx 0 holds the upper frequency cutoff and idx 1
holds the coordinates in the [not covaried] mu parameter space for
each value of the upper frequency cutoff.
"""
chi1_bin, chi2_bin = self.find_point_bin(chi_coords)
self.bank[chi1_bin][chi2_bin].append(copy.deepcopy(chi_coords))
curr_bank = self.massbank[chi1_bin][chi2_bin]
if curr_bank['mass1s'].size:
curr_bank['mass1s'] = numpy.append(curr_bank['mass1s'],
numpy.array([mass1]))
curr_bank['mass2s'] = numpy.append(curr_bank['mass2s'],
numpy.array([mass2]))
curr_bank['spin1s'] = numpy.append(curr_bank['spin1s'],
numpy.array([spin1z]))
curr_bank['spin2s'] = numpy.append(curr_bank['spin2s'],
numpy.array([spin2z]))
if point_fupper is not None:
curr_bank['freqcuts'] = numpy.append(curr_bank['freqcuts'],
numpy.array([point_fupper]))
# Mus needs to append onto axis 0. See below for contents of
# the mus variable
if mus is not None:
curr_bank['mus'] = numpy.append(curr_bank['mus'],
numpy.array([mus[:,:]]), axis=0)
else:
curr_bank['mass1s'] = numpy.array([mass1])
curr_bank['mass2s'] = numpy.array([mass2])
curr_bank['spin1s'] = numpy.array([spin1z])
curr_bank['spin2s'] = numpy.array([spin2z])
if point_fupper is not None:
curr_bank['freqcuts'] = numpy.array([point_fupper])
# curr_bank['mus'] is a 3D array
# NOTE: mu relates to the non-covaried Cartesian coordinate system
# Axis 0: Template index
# Axis 1: Frequency cutoff index
# Axis 2: Mu coordinate index
if mus is not None:
curr_bank['mus'] = numpy.array([mus[:,:]])
[docs]
def add_point_by_masses(self, mass1, mass2, spin1z, spin2z,
vary_fupper=False):
"""
Add a point to the template bank. This differs from add point to bank
as it assumes that the chi coordinates and the products needed to use
vary_fupper have not already been calculated. This function calculates
these products and then calls add_point_by_chi_coords.
This function also
carries out a number of sanity checks (eg. is the point within the
ranges given by mass_range_params) that add_point_by_chi_coords does
not do for speed concerns.
Parameters
-----------
mass1 : float
Mass of the heavier body
mass2 : float
Mass of the lighter body
spin1z : float
Spin of the heavier body
spin2z : float
Spin of the lighter body
"""
# Test that masses are the expected way around (ie. mass1 > mass2)
if mass2 > mass1:
if not self.spin_warning_given:
warn_msg = "Am adding a template where mass2 > mass1. The "
warn_msg += "convention is that mass1 > mass2. Swapping mass1 "
warn_msg += "and mass2 and adding point to bank. This message "
warn_msg += "will not be repeated."
logger.warning(warn_msg)
self.spin_warning_given = True
# These that masses obey the restrictions of mass_range_params
if self.mass_range_params.is_outside_range(mass1, mass2, spin1z,
spin2z):
err_msg = "Point with masses given by "
err_msg += "%f %f %f %f " %(mass1, mass2, spin1z, spin2z)
err_msg += "(mass1, mass2, spin1z, spin2z) is not consistent "
err_msg += "with the provided command-line restrictions on masses "
err_msg += "and spins."
raise ValueError(err_msg)
# Get chi coordinates
chi_coords = coord_utils.get_cov_params(mass1, mass2, spin1z, spin2z,
self.metric_params,
self.ref_freq)
# Get mus and best fupper for this point, if needed
if vary_fupper:
mass_dict = {}
mass_dict['m1'] = numpy.array([mass1])
mass_dict['m2'] = numpy.array([mass2])
mass_dict['s1z'] = numpy.array([spin1z])
mass_dict['s2z'] = numpy.array([spin2z])
freqs = numpy.array(list(self.frequency_map.keys()), dtype=float)
freq_cutoff = coord_utils.return_nearest_cutoff(\
self.upper_freq_formula, mass_dict, freqs)
freq_cutoff = freq_cutoff[0]
lambdas = coord_utils.get_chirp_params\
(mass1, mass2, spin1z, spin2z, self.metric_params.f0,
self.metric_params.pnOrder)
mus = []
for freq in self.frequency_map:
mus.append(coord_utils.get_mu_params(lambdas,
self.metric_params, freq) )
mus = numpy.array(mus)
else:
freq_cutoff=None
mus=None
self.add_point_by_chi_coords(chi_coords, mass1, mass2, spin1z, spin2z,
point_fupper=freq_cutoff, mus=mus)
[docs]
def add_tmpltbank_from_xml_table(self, sngl_table, vary_fupper=False):
"""
This function will take a sngl_inspiral_table of templates and add them
into the partitioned template bank object.
Parameters
-----------
sngl_table : sngl_inspiral_table
List of sngl_inspiral templates.
vary_fupper : False
If given also include the additional information needed to compute
distances with a varying upper frequency cutoff.
"""
for sngl in sngl_table:
self.add_point_by_masses(sngl.mass1, sngl.mass2, sngl.spin1z,
sngl.spin2z, vary_fupper=vary_fupper)
[docs]
def add_tmpltbank_from_hdf_file(self, hdf_fp, vary_fupper=False):
"""
This function will take a pointer to an open HDF File object containing
a list of templates and add them into the partitioned template bank
object.
Parameters
-----------
hdf_fp : h5py.File object
The template bank in HDF5 format.
vary_fupper : False
If given also include the additional information needed to compute
distances with a varying upper frequency cutoff.
"""
mass1s = hdf_fp['mass1'][:]
mass2s = hdf_fp['mass2'][:]
spin1zs = hdf_fp['spin1z'][:]
spin2zs = hdf_fp['spin2z'][:]
for idx in range(len(mass1s)):
self.add_point_by_masses(mass1s[idx], mass2s[idx], spin1zs[idx],
spin2zs[idx], vary_fupper=vary_fupper)
[docs]
def output_all_points(self):
"""Return all points in the bank.
Return all points in the bank as lists of m1, m2, spin1z, spin2z.
Returns
-------
mass1 : list
List of mass1 values.
mass2 : list
List of mass2 values.
spin1z : list
List of spin1z values.
spin2z : list
List of spin2z values.
"""
mass1 = []
mass2 = []
spin1z = []
spin2z = []
for i in self.massbank.keys():
for j in self.massbank[i].keys():
for k in range(len(self.massbank[i][j]['mass1s'])):
curr_bank = self.massbank[i][j]
mass1.append(curr_bank['mass1s'][k])
mass2.append(curr_bank['mass2s'][k])
spin1z.append(curr_bank['spin1s'][k])
spin2z.append(curr_bank['spin2s'][k])
return mass1, mass2, spin1z, spin2z