from bisect import bisect_right
try:
from fpconst import PosInf, NegInf
except ImportError:
# fpconst is not part of the standard library and might not be available
PosInf = float("+inf")
NegInf = float("-inf")
import numpy
import math
import logging
logger = logging.getLogger('pycbc.bin_utils')
[docs]
class Bins(object):
"""
Parent class for 1-dimensional binnings.
Not intended to be used directly, but to be subclassed for use in real
bins classes.
"""
def __init__(self, minv, maxv, n):
"""
Initialize a Bins instance. The three arguments are the
minimum and maximum of the values spanned by the bins, and
the number of bins to place between them. Subclasses may
require additional arguments, or different arguments
altogether.
"""
# convenience code to do some common initialization and
# input checking
if not isinstance(n, int):
raise TypeError(n)
if n < 1:
raise ValueError(n)
if maxv <= minv:
raise ValueError((minv, maxv))
self.minv = minv
self.maxv = maxv
self.n = n
def __len__(self):
return self.n
def __getitem__(self, x):
"""
Convert a co-ordinate to a bin index. The co-ordinate can
be a single value, or a Python slice instance describing a
range of values. If a single value is given, it is mapped
to the bin index corresponding to that value. If a slice
is given, it is converted to a slice whose lower bound is
the index of the bin in which the slice's lower bound
falls, and whose upper bound is 1 greater than the index of
the bin in which the slice's upper bound falls. Steps are
not supported in slices.
"""
if isinstance(x, slice):
if x.step is not None:
raise NotImplementedError("step not supported: %s" % repr(x))
return slice(self[x.start] if x.start is not None
else 0, self[x.stop] + 1 if x.stop is not None
else len(self))
raise NotImplementedError
def __iter__(self):
"""
If __iter__ does not exist, Python uses __getitem__ with
range(0) as input to define iteration. This is nonsensical
for bin objects, so explicitly unsupport iteration.
"""
raise NotImplementedError
[docs]
def lower(self):
"""
Return an array containing the locations of the lower
boundaries of the bins.
"""
raise NotImplementedError
[docs]
def centres(self):
"""
Return an array containing the locations of the bin
centres.
"""
raise NotImplementedError
[docs]
def upper(self):
"""
Return an array containing the locations of the upper
boundaries of the bins.
"""
raise NotImplementedError
[docs]
class IrregularBins(Bins):
"""
Bins with arbitrary, irregular spacing. We only require strict
monotonicity of the bin boundaries. N boundaries define N-1 bins.
Example:
>>> x = IrregularBins([0.0, 11.0, 15.0, numpy.inf])
>>> len(x)
3
>>> x[1]
0
>>> x[1.5]
0
>>> x[13]
1
>>> x[25]
2
>>> x[4:17]
slice(0, 3, None)
>>> IrregularBins([0.0, 15.0, 11.0])
Traceback (most recent call last):
...
ValueError: non-monotonic boundaries provided
>>> y = IrregularBins([0.0, 11.0, 15.0, numpy.inf])
>>> x == y
True
"""
def __init__(self, boundaries):
"""
Initialize a set of custom bins with the bin boundaries.
This includes all left edges plus the right edge. The
boundaries must be monotonic and there must be at least two
elements.
"""
# check pre-conditions
if len(boundaries) < 2:
raise ValueError("less than two boundaries provided")
boundaries = tuple(boundaries)
if any(a > b for a, b in zip(boundaries[:-1], boundaries[1:])):
raise ValueError("non-monotonic boundaries provided")
self.boundaries = boundaries
self.n = len(boundaries) - 1
self.minv = boundaries[0]
self.maxv = boundaries[-1]
def __getitem__(self, x):
if isinstance(x, slice):
return super(IrregularBins, self).__getitem__(x)
if self.minv <= x < self.maxv:
return bisect_right(self.boundaries, x) - 1
# special measure-zero edge case
if x == self.maxv:
return len(self.boundaries) - 2
raise IndexError(x)
[docs]
def lower(self):
return numpy.array(self.boundaries[:-1])
[docs]
def upper(self):
return numpy.array(self.boundaries[1:])
[docs]
def centres(self):
return (self.lower() + self.upper()) / 2.0
[docs]
class LinearBins(Bins):
"""
Linearly-spaced bins. There are n bins of equal size, the first
bin starts on the lower bound and the last bin ends on the upper
bound inclusively.
Example:
>>> x = LinearBins(1.0, 25.0, 3)
>>> x.lower()
array([ 1., 9., 17.])
>>> x.upper()
array([ 9., 17., 25.])
>>> x.centres()
array([ 5., 13., 21.])
>>> x[1]
0
>>> x[1.5]
0
>>> x[10]
1
>>> x[25]
2
>>> x[0:27]
Traceback (most recent call last):
...
IndexError: 0
>>> x[1:25]
slice(0, 3, None)
>>> x[:25]
slice(0, 3, None)
>>> x[10:16.9]
slice(1, 2, None)
>>> x[10:17]
slice(1, 3, None)
>>> x[10:]
slice(1, 3, None)
"""
def __init__(self, minv, maxv, n):
super(LinearBins, self).__init__(minv, maxv, n)
self.delta = float(maxv - minv) / n
def __getitem__(self, x):
if isinstance(x, slice):
return super(LinearBins, self).__getitem__(x)
if self.minv <= x < self.maxv:
return int(math.floor((x - self.minv) / self.delta))
if x == self.maxv:
# special "measure zero" corner case
return len(self) - 1
raise IndexError(x)
[docs]
def lower(self):
return numpy.linspace(self.minv, self.maxv - self.delta, len(self))
[docs]
def centres(self):
return numpy.linspace(self.minv + self.delta / 2.,
self.maxv - self.delta / 2., len(self))
[docs]
def upper(self):
return numpy.linspace(self.minv + self.delta, self.maxv, len(self))
[docs]
class LinearPlusOverflowBins(Bins):
"""
Linearly-spaced bins with overflow at the edges.
There are n-2 bins of equal size. The bin 1 starts on the lower bound and
bin n-2 ends on the upper bound. Bins 0 and n-1 are overflow going from
-infinity to the lower bound and from the upper bound to +infinity
respectively. Must have n >= 3.
Example:
>>> x = LinearPlusOverflowBins(1.0, 25.0, 5)
>>> x.centres()
array([-inf, 5., 13., 21., inf])
>>> x.lower()
array([-inf, 1., 9., 17., 25.])
>>> x.upper()
array([ 1., 9., 17., 25., inf])
>>> x[float("-inf")]
0
>>> x[0]
0
>>> x[1]
1
>>> x[10]
2
>>> x[24.99999999]
3
>>> x[25]
4
>>> x[100]
4
>>> x[float("+inf")]
4
>>> x[float("-inf"):9]
slice(0, 3, None)
>>> x[9:float("+inf")]
slice(2, 5, None)
"""
def __init__(self, minv, maxv, n):
if n < 3:
raise ValueError("n must be >= 3")
super(LinearPlusOverflowBins, self).__init__(minv, maxv, n)
self.delta = float(maxv - minv) / (n - 2)
def __getitem__(self, x):
if isinstance(x, slice):
return super(LinearPlusOverflowBins, self).__getitem__(x)
if self.minv <= x < self.maxv:
return int(math.floor((x - self.minv) / self.delta)) + 1
if x >= self.maxv:
# +infinity overflow bin
return len(self) - 1
if x < self.minv:
# -infinity overflow bin
return 0
raise IndexError(x)
[docs]
def lower(self):
return numpy.concatenate(
(numpy.array([NegInf]),
self.minv + self.delta * numpy.arange(len(self) - 2),
numpy.array([self.maxv]))
)
[docs]
def centres(self):
return numpy.concatenate(
(numpy.array([NegInf]),
self.minv + self.delta * (numpy.arange(len(self) - 2) + 0.5),
numpy.array([PosInf]))
)
[docs]
def upper(self):
return numpy.concatenate(
(numpy.array([self.minv]),
self.minv + self.delta * (numpy.arange(len(self) - 2) + 1),
numpy.array([PosInf]))
)
[docs]
class LogarithmicBins(Bins):
"""
Logarithmically-spaced bins.
There are n bins, each of whose upper and lower bounds differ by the same
factor. The first bin starts on the lower bound, and the last bin ends on
the upper bound inclusively.
Example:
>>> x = LogarithmicBins(1.0, 25.0, 3)
>>> x[1]
0
>>> x[5]
1
>>> x[25]
2
"""
def __init__(self, minv, maxv, n):
super(LogarithmicBins, self).__init__(minv, maxv, n)
self.delta = (math.log(maxv) - math.log(minv)) / n
def __getitem__(self, x):
if isinstance(x, slice):
return super(LogarithmicBins, self).__getitem__(x)
if self.minv <= x < self.maxv:
return int(math.floor((math.log(x) - math.log(self.minv)) /
self.delta))
if x == self.maxv:
# special "measure zero" corner case
return len(self) - 1
raise IndexError(x)
[docs]
def lower(self):
return numpy.exp(
numpy.linspace(math.log(self.minv), math.log(self.maxv) -
self.delta, len(self))
)
[docs]
def centres(self):
return numpy.exp(
numpy.linspace(math.log(self.minv), math.log(self.maxv) -
self.delta, len(self)) + self.delta / 2.
)
[docs]
def upper(self):
return numpy.exp(
numpy.linspace(math.log(self.minv) + self.delta,
math.log(self.maxv), len(self))
)
[docs]
class LogarithmicPlusOverflowBins(Bins):
"""
Logarithmically-spaced bins plus one bin at each end that goes to
zero and positive infinity respectively. There are n-2 bins each
of whose upper and lower bounds differ by the same factor. Bin 1
starts on the lower bound, and bin n-2 ends on the upper bound
inclusively. Bins 0 and n-1 are overflow bins extending from 0 to
the lower bound and from the upper bound to +infinity respectively.
Must have n >= 3.
Example:
>>> x = LogarithmicPlusOverflowBins(1.0, 25.0, 5)
>>> x[0]
0
>>> x[1]
1
>>> x[5]
2
>>> x[24.999]
3
>>> x[25]
4
>>> x[100]
4
>>> x.lower()
array([ 0. , 1. , 2.92401774, 8.54987973, 25. ])
>>> x.upper()
array([ 1. , 2.92401774, 8.54987973, 25. , inf])
>>> x.centres()
array([ 0. , 1.70997595, 5. , 14.62008869, inf])
"""
def __init__(self, minv, maxv, n):
if n < 3:
raise ValueError("n must be >= 3")
super(LogarithmicPlusOverflowBins, self).__init__(minv, maxv, n)
self.delta = (math.log(maxv) - math.log(minv)) / (n - 2)
def __getitem__(self, x):
if isinstance(x, slice):
return super(LogarithmicPlusOverflowBins, self).__getitem__(x)
if self.minv <= x < self.maxv:
return 1 + int(math.floor((math.log(x) - math.log(self.minv)) /
self.delta))
if x >= self.maxv:
# infinity overflow bin
return len(self) - 1
if x < self.minv:
# zero overflow bin
return 0
raise IndexError(x)
[docs]
def lower(self):
return numpy.concatenate(
(numpy.array([0.]),
numpy.exp(numpy.linspace(math.log(self.minv), math.log(self.maxv),
len(self) - 1))
)
)
[docs]
def centres(self):
return numpy.concatenate(
(numpy.array([0.]),
numpy.exp(numpy.linspace(math.log(self.minv), math.log(self.maxv) -
self.delta, len(self) - 2) + self.delta / 2.),
numpy.array([PosInf])
)
)
[docs]
def upper(self):
return numpy.concatenate(
(numpy.exp(numpy.linspace(math.log(self.minv), math.log(self.maxv),
len(self) - 1)),
numpy.array([PosInf])
)
)
[docs]
class NDBins(tuple):
"""
Multi-dimensional co-ordinate binning. An instance of this object
is used to convert a tuple of co-ordinates into a tuple of bin
indices. This can be used to allow the contents of an array object
to be accessed with real-valued coordinates.
NDBins is a subclass of the tuple builtin, and is initialized with
an iterable of instances of subclasses of Bins. Each Bins subclass
instance describes the binning to apply in the corresponding
co-ordinate direction, and the number of them sets the dimensions
of the binning.
Example:
>>> x = NDBins((LinearBins(1, 25, 3), LogarithmicBins(1, 25, 3)))
>>> x[1, 1]
(0, 0)
>>> x[1.5, 1]
(0, 0)
>>> x[10, 1]
(1, 0)
>>> x[1, 5]
(0, 1)
>>> x[1, 1:5]
(0, slice(0, 2, None))
>>> x.centres()
(array([ 5., 13., 21.]), array([ 1.70997595, 5. , 14.62008869]))
Note that the co-ordinates to be converted must be a tuple, even if
it is only a 1-dimensional co-ordinate.
"""
def __new__(cls, *args):
new = tuple.__new__(cls, *args)
new.minv = tuple(b.minv for b in new)
new.maxv = tuple(b.maxv for b in new)
new.shape = tuple(len(b) for b in new)
return new
def __getitem__(self, coords):
"""
When coords is a tuple, it is interpreted as an
N-dimensional co-ordinate which is converted to an N-tuple
of bin indices by the Bins instances in this object.
Otherwise coords is interpeted as an index into the tuple,
and the corresponding Bins instance is returned.
Example:
>>> x = NDBins((LinearBins(1, 25, 3), LogarithmicBins(1, 25, 3)))
>>> x[1, 1]
(0, 0)
>>> type(x[1])
<class 'pylal.rate.LogarithmicBins'>
When used to convert co-ordinates to bin indices, each
co-ordinate can be anything the corresponding Bins instance
will accept. Note that the co-ordinates to be converted
must be a tuple, even if it is only a 1-dimensional
co-ordinate.
"""
if isinstance(coords, tuple):
if len(coords) != len(self):
raise ValueError("dimension mismatch")
return tuple(map(lambda b, c: b[c], self, coords))
else:
return tuple.__getitem__(self, coords)
[docs]
def lower(self):
"""
Return a tuple of arrays, where each array contains the
locations of the lower boundaries of the bins in the
corresponding dimension.
"""
return tuple(b.lower() for b in self)
[docs]
def centres(self):
"""
Return a tuple of arrays, where each array contains the
locations of the bin centres for the corresponding
dimension.
"""
return tuple(b.centres() for b in self)
[docs]
def upper(self):
"""
Return a tuple of arrays, where each array contains the
locations of the upper boundaries of the bins in the
corresponding dimension.
"""
return tuple(b.upper() for b in self)
[docs]
class BinnedArray(object):
"""
A convenience wrapper, using the NDBins class to provide access to
the elements of an array object. Technical reasons preclude
providing a subclass of the array object, so the array data is made
available as the "array" attribute of this class.
Examples:
Note that even for 1 dimensional arrays the index must be a tuple.
>>> x = BinnedArray(NDBins((LinearBins(0, 10, 5),)))
>>> x.array
array([ 0., 0., 0., 0., 0.])
>>> x[0,] += 1
>>> x[0.5,] += 1
>>> x.array
array([ 2., 0., 0., 0., 0.])
>>> x.argmax()
(1.0,)
Note the relationship between the binning limits, the bin centres,
and the co-ordinates of the BinnedArray
>>> x = BinnedArray(NDBins((LinearBins(-0.5, 1.5, 2), \
LinearBins(-0.5, 1.5, 2))))
>>> x.bins.centres()
(array([ 0., 1.]), array([ 0., 1.]))
>>> x[0, 0] = 0
>>> x[0, 1] = 1
>>> x[1, 0] = 2
>>> x[1, 1] = 4
>>> x.array
array([[ 0., 1.],
[ 2., 4.]])
>>> x[0, 0]
0.0
>>> x[0, 1]
1.0
>>> x[1, 0]
2.0
>>> x[1, 1]
4.0
>>> x.argmin()
(0.0, 0.0)
>>> x.argmax()
(1.0, 1.0)
"""
def __init__(self, bins, array=None, dtype="double"):
self.bins = bins
if array is None:
self.array = numpy.zeros(bins.shape, dtype=dtype)
else:
if array.shape != bins.shape:
raise ValueError("input array and input bins must have the "
"same shape")
self.array = array
def __getitem__(self, coords):
return self.array[self.bins[coords]]
def __setitem__(self, coords, val):
self.array[self.bins[coords]] = val
def __len__(self):
return len(self.array)
[docs]
def copy(self):
"""
Return a copy of the BinnedArray. The .bins attribute is
shared with the original.
"""
return type(self)(self.bins, self.array.copy())
[docs]
def centres(self):
"""
Return a tuple of arrays containing the bin centres for
each dimension.
"""
return self.bins.centres()
[docs]
def argmin(self):
"""
Return the co-ordinates of the bin centre containing the
minimum value. Same as numpy.argmin(), converting the
indexes to bin co-ordinates.
"""
return tuple(centres[index] for centres, index in
zip(self.centres(), numpy.unravel_index(self.array.argmin(),
self.array.shape)))
[docs]
def argmax(self):
"""
Return the co-ordinates of the bin centre containing the
maximum value. Same as numpy.argmax(), converting the
indexes to bin co-ordinates.
"""
return tuple(centres[index] for centres, index in
zip(self.centres(), numpy.unravel_index(self.array.argmax(),
self.array.shape)))
[docs]
def logregularize(self, epsilon=2**-1074):
"""
Find bins <= 0, and set them to epsilon, This has the
effect of allowing the logarithm of the array to be
evaluated without error.
"""
self.array[self.array <= 0] = epsilon
return self
[docs]
class BinnedRatios(object):
"""
Like BinnedArray, but provides a numerator array and a denominator
array. The incnumerator() method increments a bin in the numerator
by the given weight, and the incdenominator() method increments a
bin in the denominator by the given weight. There are no methods
provided for setting or decrementing either, but the they are
accessible as the numerator and denominator attributes, which are
both BinnedArray objects.
"""
def __init__(self, bins, dtype="double"):
self.numerator = BinnedArray(bins, dtype=dtype)
self.denominator = BinnedArray(bins, dtype=dtype)
def __getitem__(self, coords):
return self.numerator[coords] / self.denominator[coords]
[docs]
def bins(self):
return self.numerator.bins
[docs]
def incnumerator(self, coords, weight=1):
"""
Add weight to the numerator bin at coords.
"""
self.numerator[coords] += weight
[docs]
def incdenominator(self, coords, weight=1):
"""
Add weight to the denominator bin at coords.
"""
self.denominator[coords] += weight
[docs]
def ratio(self):
"""
Compute and return the array of ratios.
"""
return self.numerator.array / self.denominator.array
[docs]
def regularize(self):
"""
Find bins in the denominator that are 0, and set them to 1.
Presumably the corresponding bin in the numerator is also
0, so this has the effect of allowing the ratio array to be
evaluated without error, returning zeros in those bins that
have had no weight added to them.
"""
self.denominator.array[self.denominator.array == 0] = 1
return self
[docs]
def logregularize(self, epsilon=2**-1074):
"""
Find bins in the denominator that are 0, and set them to 1,
while setting the corresponding bin in the numerator to
float epsilon. This has the effect of allowing the
logarithm of the ratio array to be evaluated without error.
"""
self.numerator.array[self.denominator.array == 0] = epsilon
self.denominator.array[self.denominator.array == 0] = 1
return self
[docs]
def centres(self):
"""
Return a tuple of arrays containing the bin centres for
each dimension.
"""
return self.numerator.bins.centres()