Source code for pycbc.io.record

# Copyright (C) 2015  Collin Capano
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.


#
# =============================================================================
#
#                           Preamble
#
# =============================================================================
#
"""
This modules provides definitions of, and helper functions for, FieldArray.
FieldArray are wrappers of numpy recarrays with additional functionality
useful for storing and retrieving data created by a search for gravitationa
waves.
"""

import types, re, copy, numpy, inspect
from igwn_ligolw import types as ligolw_types
from pycbc import coordinates, conversions, cosmology
from pycbc.population import population_models
from pycbc.waveform import parameters

# what functions are given to the eval in FieldArray's __getitem__:
_numpy_function_lib = {_x: _y for _x,_y in numpy.__dict__.items()
                       if isinstance(_y, (numpy.ufunc, float))}

#
# =============================================================================
#
#                           Data type mappings
#
# =============================================================================
#
# add ligolw_types to numpy sctypeDict
# but don't include bindings that numpy already defines
numpy.sctypeDict.update({_k: _val
                         for (_k, _val) in ligolw_types.ToNumPyType.items()
                         if _k not in numpy.sctypeDict})

# Annoyingly, numpy has no way to store NaNs in an integer field to indicate
# the equivalent of None. This can be problematic for fields that store ids:
# if an array has an id field with value 0, it isn't clear if this is because
# the id is the first element, or if no id was set. To clear up the ambiguity,
# we define here an integer to indicate 'id not set'.
ID_NOT_SET = -1
EMPTY_OBJECT = None
VIRTUALFIELD_DTYPE = 'VIRTUAL'

def set_default_empty(array):
    if array.dtype.names is None:
        # scalar dtype, just set
        if array.dtype.str[1] == 'i':
            # integer, set to ID_NOT_SET
            array[:] = ID_NOT_SET
        elif array.dtype.str[1] == 'O':
            # object, set to EMPTY_OBJECT
            array[:] = EMPTY_OBJECT
    else:
        for name in array.dtype.names:
            set_default_empty(array[name])

def default_empty(shape, dtype):
    """Numpy's empty array can have random values in it. To prevent that, we
    define here a default emtpy array. This default empty is a numpy.zeros
    array, except that objects are set to None, and all ints to ID_NOT_SET.
    """
    default = numpy.zeros(shape, dtype=dtype)
    set_default_empty(default)
    return default

# set default data types
_default_types_status = {
    'default_strlen': 50,
    'ilwd_as_int': True,
    'lstring_as_obj': False
}

def lstring_as_obj(true_or_false=None):
    """Toggles whether lstrings should be treated as strings or as objects.
    When FieldArrays is first loaded, the default is True.

    Parameters
    ----------
    true_or_false : {None|bool}
        Pass True to map lstrings to objects; False otherwise. If None
        provided, just returns the current state.

    Return
    ------
    current_stat : bool
        The current state of lstring_as_obj.

    Examples
    --------
    >>> from pycbc.io import FieldArray
    >>> FieldArray.lstring_as_obj()
        True
    >>> FieldArray.FieldArray.from_arrays([numpy.zeros(10)], dtype=[('foo', 'lstring')])
    FieldArray([(0.0,), (0.0,), (0.0,), (0.0,), (0.0,), (0.0,), (0.0,), (0.0,),
           (0.0,), (0.0,)],
          dtype=[('foo', 'O')])
    >>> FieldArray.lstring_as_obj(False)
        False
    >>> FieldArray.FieldArray.from_arrays([numpy.zeros(10)], dtype=[('foo', 'lstring')])
    FieldArray([('0.0',), ('0.0',), ('0.0',), ('0.0',), ('0.0',), ('0.0',),
           ('0.0',), ('0.0',), ('0.0',), ('0.0',)],
          dtype=[('foo', 'S50')])
    """
    if true_or_false is not None:
        _default_types_status['lstring_as_obj'] = true_or_false
        # update the sctypeDict
        numpy.sctypeDict[u'lstring'] = numpy.object_ \
            if _default_types_status['lstring_as_obj'] \
            else 'S%i' % _default_types_status['default_strlen']
    return _default_types_status['lstring_as_obj']

def ilwd_as_int(true_or_false=None):
    """Similar to lstring_as_obj, sets whether or not ilwd:chars should be
    treated as strings or as ints. Default is True.
    """
    if true_or_false is not None:
        _default_types_status['ilwd_as_int'] = true_or_false
        numpy.sctypeDict[u'ilwd:char'] = int \
            if _default_types_status['ilwd_as_int'] \
            else 'S%i' % default_strlen
    return _default_types_status['ilwd_as_int']

def default_strlen(strlen=None):
    """Sets the default string length for lstring and ilwd:char, if they are
    treated as strings. Default is 50.
    """
    if strlen is not None:
        _default_types_status['default_strlen'] = strlen
        # update the sctypeDicts as needed
        lstring_as_obj(_default_types_status['lstring_as_obj'])
        ilwd_as_int(_default_types_status['ilwd_as_int'])
    return _default_types_status['default_strlen']

# set the defaults
lstring_as_obj(True)
ilwd_as_int(True)


#
# =============================================================================
#
#                           Helper functions
#
# =============================================================================
#


#
#   Argument syntax parsing
#
# this parser will pull out sufields as separate identifiers from their parent
# field; e.g., foo.bar --> ['foo', 'bar']
_pyparser = re.compile(r'(?P<identifier>[\w_][\w\d_]*)')
# this parser treats subfields as one identifier with their parent field;
# e.g., foo.bar --> ['foo.bar']
_fieldparser = re.compile(r'(?P<identifier>[\w_][.\w\d_]*)')
def get_vars_from_arg(arg):
    """Given a python string, gets the names of any identifiers use in it.
    For example, if ``arg = '3*narf/foo.bar'``, this will return
    ``set(['narf', 'foo', 'bar'])``.
    """
    return set(_pyparser.findall(arg))

def get_fields_from_arg(arg):
    """Given a python string, gets FieldArray field names used in it. This
    differs from get_vars_from_arg in that any identifier with a '.' in it
    will be treated as one identifier. For example, if
    ``arg = '3*narf/foo.bar'``, this will return ``set(['narf', 'foo.bar'])``.
    """
    return set(_fieldparser.findall(arg))

# this parser looks for fields inside a class method function. This is done by
# looking for variables that start with self.{x} or self["{x}"]; e.g.,
# self.a.b*3 + self.c, self['a.b']*3 + self.c, self.a.b*3 + self["c"], all
# return set('a.b', 'c').
_instfieldparser = re.compile(
    r'''self(?:\.|(?:\[['"]))(?P<identifier>[\w_][.\w\d_]*)''')
def get_instance_fields_from_arg(arg):
    """Given a python string definining a method function on an instance of an
    FieldArray, returns the field names used in it. This differs from
    get_fields_from_arg in that it looks for variables that start with 'self'.
    """
    return set(_instfieldparser.findall(arg))

def get_needed_fieldnames(arr, names):
    """Given a FieldArray-like array and a list of names, determines what
    fields are needed from the array so that using the names does not result
    in an error.

    Parameters
    ----------
    arr : instance of a FieldArray or similar
        The array from which to determine what fields to get.
    names : (list of) strings
        A list of the names that are desired. The names may be either a field,
        a virtualfield, a property, a method of ``arr``, or any function of
        these. If a virtualfield/property or a method, the source code of that
        property/method will be analyzed to pull out what fields are used in
        it.

    Returns
    -------
    set
        The set of the fields needed to evaluate the names.
    """
    fieldnames = set([])
    # we'll need the class that the array is an instance of to evaluate some
    # things
    cls = arr.__class__
    if isinstance(names, str):
        names = [names]
    # parse names for variables, incase some of them are functions of fields
    parsed_names = set([])
    for name in names:
        parsed_names.update(get_fields_from_arg(name))
    # only include things that are in the array's namespace
    names = list(parsed_names & (set(dir(arr)) | set(arr.fieldnames)))
    for name in names:
        if name in arr.fieldnames:
            # is a field, just add the name
            fieldnames.update([name])
        else:
            # the name is either a virtualfield, a method, or some other
            # property; we need to evaluate the source code to figure out what
            # fields we need
            try:
                # the underlying functions of properties need to be retrieved
                # using their fget attribute
                func = getattr(cls, name).fget
            except AttributeError:
                # no fget attribute, assume is an instance method
                func = getattr(arr, name)
            # evaluate the source code of the function
            try:
                sourcecode = inspect.getsource(func)
            except TypeError:
                # not a function, just pass
                continue
            # evaluate the source code for the fields
            possible_fields = get_instance_fields_from_arg(sourcecode)
            # some of the variables returned by possible fields may themselves
            # be methods/properties that depend on other fields. For instance,
            # mchirp relies on eta and mtotal, which each use mass1 and mass2;
            # we therefore need to anayze each of the possible fields
            fieldnames.update(get_needed_fieldnames(arr, possible_fields))
    return fieldnames


def get_dtype_descr(dtype):
    """Numpy's ``dtype.descr`` will return empty void fields if a dtype has
    offsets specified. This function tries to fix that by not including
    fields that have no names and are void types.
    """
    dts = []
    for dt in dtype.descr:
        if (dt[0] == '' and dt[1][1] == 'V'):
            continue

        # Downstream codes (numpy, etc) can't handle metadata in dtype
        if isinstance(dt[1], tuple):
            dt = (dt[0], dt[1][0])

        dts.append(dt)
    return dts


def combine_fields(dtypes):
    """Combines the fields in the list of given dtypes into a single dtype.

    Parameters
    ----------
    dtypes : (list of) numpy.dtype(s)
        Either a numpy.dtype, or a list of numpy.dtypes.

    Returns
    -------
    numpy.dtype
        A new dtype combining the fields in the list of dtypes.
    """
    if not isinstance(dtypes, list):
        dtypes = [dtypes]
    # Note: incase any of the dtypes have offsets, we won't include any fields
    # that have no names and are void
    new_dt = numpy.dtype([dt for dtype in dtypes \
        for dt in get_dtype_descr(dtype)])
    return new_dt


def _ensure_array_list(arrays):
    """Ensures that every element in a list is an instance of a numpy array."""
    # Note: the isinstance test is needed below so that instances of FieldArray
    # are not converted to numpy arrays
    return [numpy.array(arr, ndmin=1) if not isinstance(arr, numpy.ndarray)
            else arr for arr in arrays]


def merge_arrays(merge_list, names=None, flatten=True, outtype=None):
    """Merges the given arrays into a single array. The arrays must all have
    the same shape. If one or more of the given arrays has multiple fields,
    all of the fields will be included as separate fields in the new array.

    Parameters
    ----------
    merge_list : list of arrays
        The list of arrays to merge.
    names : {None | sequence of strings}
        Optional, the names of the fields in the output array. If flatten is
        True, must be the same length as the total number of fields in
        merge_list.  Otherise, must be the same length as the number of
        arrays in merge_list.  If None provided, and flatten is True, names
        used will be the same as the name of the fields in the given arrays.
        If the datatype has no name, or flatten is False, the new field will
        be `fi` where i is the index of the array in arrays.
    flatten : bool
        Make all of the fields in the given arrays separate fields in the
        new array. Otherwise, each array will be added as a field. If an
        array has fields, they will be subfields in the output array. Default
        is True.
    outtype : {None | class}
        Cast the new array to the given type. Default is to return a
        numpy structured array.

    Returns
    -------
    new array : {numpy.ndarray | outtype}
        A new array with all of the fields in all of the arrays merged into
        a single array.
    """
    # make sure everything in merge_list is an array
    merge_list = _ensure_array_list(merge_list)
    if not all(merge_list[0].shape == arr.shape for arr in merge_list):
        raise ValueError("all of the arrays in merge_list must have the " +
            "same shape")
    if flatten:
        new_dt = combine_fields([arr.dtype for arr in merge_list])
    else:
        new_dt = numpy.dtype([('f%i' %ii, arr.dtype.descr) \
            for ii,arr in enumerate(merge_list)])
    new_arr = merge_list[0].__class__(merge_list[0].shape, dtype=new_dt)
    # ii is a counter to keep track of which fields from the new array
    # go with which arrays in merge list
    ii = 0
    for arr in merge_list:
        if arr.dtype.names is None:
            new_arr[new_dt.names[ii]] = arr
            ii += 1
        else:
            for field in arr.dtype.names:
                new_arr[field] = arr[field]
                ii += 1
    # set the names if desired
    if names is not None:
        new_arr.dtype.names = names
    # ditto the outtype
    if outtype is not None:
        new_arr = new_arr.view(type=outtype)
    return new_arr

def add_fields(input_array, arrays, names=None, assubarray=False):
    """Adds the given array(s) as new field(s) to the given input array.
    Returns a new instance of the input_array with the new fields added.

    Parameters
    ----------
    input_array : instance of a numpy.ndarray or numpy recarray
        The array to to add the fields to.
    arrays : (list of) numpy array(s)
        The arrays to add. If adding multiple arrays, must be a list;
        if adding a single array, can just be that array.
    names : (list of) strings
        Optional, the name(s) of the new fields in the output array. If
        adding multiple fields, must be a list of strings with the same
        length as the list of arrays. If None provided, names used will
        be the same as the name of the datatype in the given arrays.
        If the datatype has no name, the new field will be ``'fi'`` where
        i is the index of the array in arrays.
    assubarray : bool
        Add the list of arrays as a single subarray field. If True, and names
        provided, names should be a string or a length-1 sequence. Default is
        False, in which case each array will be added as a separate field.

    Returns
    -------
    new_array : new instance of `input_array`
        A copy of the `input_array` with the desired fields added.
    """
    if not isinstance(arrays, list):
        arrays = [arrays]
    # ensure that all arrays in arrays are arrays
    arrays = _ensure_array_list(arrays)
    # set the names
    if names is not None:
        if isinstance(names, str):
            names = [names]
        # check if any names are subarray names; if so, we have to add them
        # separately
        subarray_names = [name for name in names if len(name.split('.')) > 1]
    else:
        subarray_names = []
    if any(subarray_names):
        subarrays = [arrays[ii] for ii,name in enumerate(names) \
            if name in subarray_names]
        # group together by subarray
        groups = {}
        for name,arr in zip(subarray_names, subarrays):
            key = name.split('.')[0]
            subkey = '.'.join(name.split('.')[1:])
            try:
                groups[key].append((subkey, arr))
            except KeyError:
                groups[key] = [(subkey, arr)]
        # now cycle over the groups, adding all of the fields in each group
        # as a subarray
        for group_name in groups:
            # we'll create a dictionary out of the subarray field names ->
            # subarrays
            thisdict = dict(groups[group_name])
            # check if the input array has this field; if so, remove it, then
            # add it back with the other new arrays
            if group_name in input_array.fieldnames:
                # get the data
                new_subarray = input_array[group_name]
                # add the new fields to the subarray
                new_subarray = add_fields(new_subarray, thisdict.values(),
                    thisdict.keys())
                # remove the original from the input array
                input_array = input_array.without_fields(group_name)
            else:
                new_subarray = thisdict.values()
            # add the new subarray to input_array as a subarray
            input_array = add_fields(input_array, new_subarray,
                names=group_name, assubarray=True)
            # set the subarray names
            input_array[group_name].dtype.names = thisdict.keys()
        # remove the subarray names from names
        keep_idx = [ii for ii,name in enumerate(names) \
            if name not in subarray_names]
        names = [names[ii] for ii in keep_idx]
        # if there's nothing left, just return
        if names == []:
            return input_array
        # also remove the subarray arrays
        arrays = [arrays[ii] for ii in keep_idx]
    if assubarray:
        # merge all of the arrays into a single array
        if len(arrays) > 1:
            arrays = [merge_arrays(arrays, flatten=True)]
        # now merge all the fields as a single subarray
        merged_arr = numpy.empty(len(arrays[0]),
            dtype=[('f0', arrays[0].dtype.descr)])
        merged_arr['f0'] = arrays[0]
        arrays = [merged_arr]
    merge_list = [input_array] + arrays
    if names is not None:
        names = list(input_array.dtype.names) + names
    # merge into a single array
    return merge_arrays(merge_list, names=names, flatten=True,
        outtype=type(input_array))


#
# =============================================================================
#
#                           Base FieldArray definitions
#
# =============================================================================
#

# We'll include functions in various pycbc modules in FieldArray's function
# library. All modules used must have an __all__ list defined.
_modules_for_functionlib = [conversions, coordinates, cosmology,
                            population_models]
_fieldarray_functionlib = {_funcname : getattr(_mod, _funcname)
                              for _mod in _modules_for_functionlib
                              for _funcname in getattr(_mod, '__all__')}


[docs]
class FieldArray(numpy.recarray):
    """
    Subclass of numpy.recarray that adds additional functionality.

    Initialization is done the same way as numpy.recarray, with the addition
    that a "name" attribute can be passed to name the output array. When you
    initialize an array it creates a new zeroed array. This is similar to
    numpy.recarray, except that ``numpy.recarray(shape)`` will create an empty
    array, whereas here the default is to zero all of the elements (see
    ``default_zero`` for definition of zero for different data types). If you
    prefer an empty array, set ``zero=False`` when initializing.

    You cannot pass an array or sequence as input as you do with numpy.array.
    To initialize an FieldArray from an already existing arrays, use the
    ``FieldArray.from_arrays`` class method. To initialize from a list of
    tuples, use ``FieldArray.from_records``. See the docstring for those methods
    for details. For more information on initalizing an empty array, see
    ``numpy.recarray`` help.

    Some additional features:

    * **Arbitrary functions**:

      You can retrive functions on fields in the same manner that you access
      individual fields. For example, if you have a FieldArray ``x`` with
      fields ``a`` and ``b``, you can access each field with
      ``x['a'], x['b']``.  You can also do ``x['a*b/(a+b)**2.']``,
      ``x[cos(a)*sin(b)]``, etc. Boolean operations are also possible, e.g.,
      ``x['(a < 3) & (b < 2)']``. Syntax for functions is python. Any numpy
      ufunc, as well as all functions listed in the functionlib attribute, may
      be used. Note that while fields may be accessed as attributes (e.g,
      field ``a`` can be accessed via ``x['a']`` or ``x.a``), functions on
      multiple fields may not (``x.a+b`` does not work, for obvious reasons).

    * **Subfields and '.' indexing**:
      Structured arrays, which are the base class for recarrays and, by
      inheritance, FieldArray, allows for fields to themselves have fields. For
      example, an array ``x`` may have fields ``a`` and ``b``, with ``b`` having
      subfields ``c`` and ``d``. You can access subfields using other index
      notation or attribute notation. So, the subfields ``d`` may be retrieved
      via ``x['b']['d']``, ``x.b.d``, ``x['b'].d`` or ``x['b.d']``. Likewise,
      functions can be carried out on the subfields, as they can on fields. If
      ``d`` is a float field, we could get the log of it via ``x['log(b.d)']``.
      There is no limit to the number of subfields. So, ``c`` could also have
      subfield ``c0``, which would be accessed via ``x.c.c0``, or any of the
      other methods.

    .. warning::
       Record arrays also allow you to set values of a field using attribute
       notation. However, this can lead to unexpected results if you
       accidently misspell the attribute. For example, if ``x`` has field
       ``foo``, and you misspell this when setting, e.g., you try to do
       ``x.fooo = numpy.arange(x.size)``, ``foo`` will not be set, nor will
       you get an error. Instead, the attribute ``fooo`` will be added to
       ``x``. If you tried to do this using index notation, however ---
       ``x['fooo'] = numpy.arange(x.size)`` --- you will
       get an ``AttributeError`` as you might expect. For this reason, it is
       recommended that you always use index notation when *setting* values;
       you can use either index or attribute notation when *retrieving*
       values.

    * **Properties and methods as fields**:
      If a propety or instance method is defined for a class that inherits from
      FieldArray, those can be accessed in the same way as fields are. For
      example, define ``Foo`` as:

    .. code-block:: python

        class Foo(FieldArray):
            @property
            def bar(self):
                return self['a']**2.

            def narf(self, y):
                return self['a'] + y

    Then if we have an instance: ``foo = Foo(100, dtype=[('a', float)])``.
    The ``bar`` and ``narf`` attributes may be accessed via field notation:
    ``foo.bar``, ``foo['bar']``, ``foo.narf(10)`` and ``foo['narf(10)']``.

    * **Virtual fields**:
      Virtual fields are methods wrapped as properties that operate on one or
      more fields, thus returning an array of values. To outside code virtual
      fields look the same as fields, and can be called similarily. Internally,
      no additional data is stored; the operation is performed on the fly when
      the virtual field is called. Virtual fields can be added to an array
      instance with the add_virtualfields method. Alternatively, virtual fields
      can be defined by sub-classing FieldArray:

    .. code-block:: python

        class Foo(FieldArray):
            _virtualfields = ['bar']
            @property
            def bar(self):
                return self['a']**2.

    The fields property returns the names of both fields and virtual fields.

    .. note::

        It can happen that a field, virtual field, or function in the
        functionlib have that same name. In that case, precedence is: field,
        virtual field, function. For example, if a function called 'foo' is in
        the function library, and a virtual field is added call 'foo', then
        `a['foo']` will return the virtual field rather than the function.
        Likewise, if the array is initialized with a field called `foo`, or a
        field with that name is added, `a['foo']` will return that field
        rather than the virtual field and/or the function.

    Parameters
    ----------
    shape : {int | tuple}
        The shape of the new array.
    name : {None | str}
        Optional, what to name the new array. The array's ``name`` attribute
        is set to this.

    For details on other keyword arguments, see ``numpy.recarray`` help.

    Attributes
    ----------
    name : str
        Instance attribute. The name of the array.

    Examples
    --------
    .. note:: For some predefined arrays with default fields, see the other
        array classes defined below.

    Create an empty array with four rows and two fields called `foo` and
    `bar`, both of which are floats:

    >>> x = FieldArray(4, dtype=[('foo', float), ('bar', float)])

    Set/retrieve a fields using index or attribute syntax:

    >>> x['foo'] = [1.,2.,3.,4.]
    >>> x['bar'] = [5.,6.,7.,8.]
    >>> x
    FieldArray([(1.0, 5.0), (2.0, 6.0), (3.0, 7.0), (4.0, 8.0)],
          dtype=[('foo', '<f8'), ('bar', '<f8')])
    >>> x.foo
        array([ 1.,  2.,  3.,  4.])
    >>> x['bar']
        array([ 5.,  6.,  7.,  8.])

    Get the names of the fields:

    >>> x.fieldnames
        ('foo', 'bar')

    Rename the fields to `a` and `b`:

    >>> x.dtype.names = ['a', 'b']
    >>> x.fieldnames
        ('a', 'b')

    Retrieve a function of the fields as if it were a field:

    >>> x['sin(a/b)']
    array([ 0.19866933,  0.3271947 ,  0.41557185,  0.47942554])

    Add a virtual field:

    >>> def c(self):
    ...     return self['a'] + self['b']
    ...
    >>> x = x.add_virtualfields('c', c)
    >>> x.fields
    ('a', 'b', 'c')
    >>> x['c']
    array([  6.,   8.,  10.,  12.])

    Create an array with subfields:

    >>> x = FieldArray(4, dtype=[('foo', [('cat', float), ('hat', int)]), ('bar', float)])
    >>> x.fieldnames
        ['foo.cat', 'foo.hat', 'bar']

    Load from a list of arrays (in this case, from an hdf5 file):

    >>> bankhdf = h5py.File('bank/H1L1-BANK2HDF-1117400416-928800.hdf')
    >>> bankhdf.keys()
        [u'mass1', u'mass2', u'spin1z', u'spin2z', u'template_hash']
    >>> templates = FieldArray.from_arrays(bankhdf.values(), names=bankhdf.keys())
    >>> templates.fieldnames
        ('mass1', 'mass2', 'spin1z', 'spin2z', 'template_hash')
    >>> templates.mass1
    array([ 1.71731389,  1.10231435,  2.99999857, ...,  1.67488706,
            1.00531888,  2.11106491], dtype=float32)

    Sort by a field without having to worry about also sorting the other
    fields:

    >>> templates[['mass1', 'mass2']]
    array([(1.7173138856887817, 1.2124452590942383),
           (1.1023143529891968, 1.0074082612991333),
           (2.9999985694885254, 1.0578444004058838), ...,
           (1.6748870611190796, 1.1758257150650024),
           (1.0053188800811768, 1.0020891427993774),
           (2.111064910888672, 1.0143394470214844)],
          dtype=[('mass1', '<f4'), ('mass2', '<f4')])
    >>> templates.sort(order='mass1')
    >>> templates[['mass1', 'mass2']]
    array([(1.000025987625122, 1.0000133514404297),
           (1.0002814531326294, 1.0002814531326294),
           (1.0005437135696411, 1.0005437135696411), ...,
           (2.999999523162842, 1.371169090270996),
           (2.999999523162842, 1.4072519540786743), (3.0, 1.4617927074432373)],
          dtype=[('mass1', '<f4'), ('mass2', '<f4')])

    Convert a LIGOLW xml table:

    >>> type(sim_table)
        igwn_ligolw.lsctables.SimInspiralTable
    >>> sim_array = FieldArray.from_ligolw_table(sim_table)
    >>> sim_array.mass1
    array([ 2.27440691,  1.85058105,  1.61507106, ...,  2.0504961 ,
            2.33554196,  2.02732205], dtype=float32)
    >>> sim_array.waveform
    array([u'SpinTaylorT2', u'SpinTaylorT2', u'SpinTaylorT2', ...,
           u'SpinTaylorT2', u'SpinTaylorT2', u'SpinTaylorT2'], dtype=object)

    >>> sim_array = FieldArray.from_ligolw_table(sim_table, columns=['simulation_id', 'mass1', 'mass2'])
    >>> sim_array
    FieldArray([(0, 2.274406909942627, 2.6340370178222656),
           (1, 1.8505810499191284, 2.8336880207061768),
           (2, 1.6150710582733154, 2.2336490154266357), ...,
           (11607, 2.0504961013793945, 2.6019821166992188),
           (11608, 2.3355419635772705, 1.2164380550384521),
           (11609, 2.0273220539093018, 2.2453839778900146)],
          dtype=[('simulation_id', '<i8'), ('mass1', '<f4'), ('mass2', '<f4')])

    Add a field to the array:

    >>> optimal_snrs = numpy.random.uniform(4.,40., size=len(sim_array))
    >>> sim_array = sim_array.add_fields(optimal_snrs, 'optimal_snrs')
    >>> sim_array.fieldnames
        ('simulation_id', 'mass1', 'mass2', 'optimal_snrs')

    Notes
    -----
    Input arrays with variable-length strings in one or more fields can be
    tricky to deal with. Numpy arrays are designed to use fixed-length
    datasets, so that quick memory access can be achieved. To deal with
    variable-length strings, there are two options: 1. set the data type to
    object, or 2. set the data type to a string with a fixed length larger
    than the longest string in the input array.

    The first option, using objects, essentially causes the array to store a
    pointer to the string.  This is the most flexible option, as it allows
    strings in the array to be updated to any length. However, operations on
    object fields are slower, as numpy cannot take advantage of its fast
    memory striding abilities (see `this question/answer on stackoverflow
    <http://stackoverflow.com/a/14639568/1366472>`_ for details). Also,
    numpy's support of object arrays is more limited.  In particular, prior
    to version 1.9.2, you cannot create a view of an array that changes the
    dtype if the array has any fields that are objects, even if the view does
    not affect the object fields. (This has since been relaxed.)

    The second option, using strings of a fixed length, solves the issues
    with object fields. However, if you try to change one of the strings
    after the array is created, the string will be truncated at whatever
    string length is used. Additionally, if you choose too large of a string
    length, you can substantially increase the memory overhead for large
    arrays.

    """
    _virtualfields = []
    _functionlib = _fieldarray_functionlib
    __persistent_attributes__ = ['name', '_virtualfields', '_functionlib']

    def __new__(cls, shape, name=None, zero=True, **kwargs):
        """Initializes a new empty array.
        """
        obj = super(FieldArray, cls).__new__(cls, shape, **kwargs).view(
            type=cls)
        obj.name = name
        obj.__persistent_attributes__ = [a
            for a in cls.__persistent_attributes__]
        obj._functionlib = {f: func for f,func in cls._functionlib.items()}
        obj._virtualfields = [f for f in cls._virtualfields]
        # zero out the array if desired
        if zero:
            default = default_empty(1, dtype=obj.dtype)
            obj[:] = default
        return obj

    def __array_finalize__(self, obj):
        """Default values are set here.

        See <https://docs.scipy.org/doc/numpy/user/basics.subclassing.html> for
        details.
        """
        if obj is None:
            return
        # copy persistent attributes
        try:
            obj.__copy_attributes__(self)
        except AttributeError:
            pass

    def __copy_attributes__(self, other, default=None):
        """Copies the values of all of the attributes listed in
        `self.__persistent_attributes__` to other.
        """
        [setattr(other, attr, copy.deepcopy(getattr(self, attr, default))) \
            for attr in self.__persistent_attributes__]

    def __getattribute__(self, attr, no_fallback=False):
        """Allows fields to be accessed as attributes.
        """
        # first try to get the attribute
        try:
            return numpy.ndarray.__getattribute__(self, attr)
        except AttributeError as e:
            # don't try getitem, which might get back here
            if no_fallback:
                raise(e)

            # might be a field, try to retrive it using getitem
            if attr in self.fields:
                return self.__getitem__(attr)
            # otherwise, unrecognized
            raise AttributeError(e)

    def __setitem__(self, item, values):
        """Wrap's recarray's setitem to allow attribute-like indexing when
        setting values.
        """
        if type(item) is int and type(values) is numpy.ndarray:
            # numpy >=1.14 only accepts tuples
            values = tuple(values)
        try:
            return super(FieldArray, self).__setitem__(item, values)
        except ValueError:
            # we'll get a ValueError if a subarray is being referenced using
            # '.'; so we'll try to parse it out here
            fields = item.split('.')
            if len(fields) > 1:
                for field in fields[:-1]:
                    self = self[field]
                item = fields[-1]
            # now try again
            return super(FieldArray, self).__setitem__(item, values)

    def __getbaseitem__(self, item):
        """Gets an item assuming item is either an index or a fieldname.
        """
        # We cast to a ndarray to avoid calling array_finalize, which can be
        # slow
        out = self.view(numpy.ndarray)[item]
        # if there are no fields, then we can just return
        if out.dtype.fields is None:
            return out
        # if there are fields, but only a single entry, we'd just get a
        # record by casting to self, so just cast immediately to recarray
        elif out.ndim == 0:
            return out.view(numpy.recarray)
        # otherwise, cast back to an instance of self
        else:
            return out.view(type(self))

    def __getsubitem__(self, item):
        """Gets a subfield using `field.subfield` notation.
        """
        try:
            return self.__getbaseitem__(item)
        except ValueError as err:
            subitems = item.split('.')
            if len(subitems) > 1:
                return self.__getbaseitem__(subitems[0]
                    ).__getsubitem__('.'.join(subitems[1:]))
            else:
                raise ValueError(err)

    def __getitem__(self, item):
        """Wraps recarray's  `__getitem__` so that math functions on fields and
        attributes can be retrieved. Any function in numpy's library may be
        used.
        """
        try:
            return self.__getsubitem__(item)
        except ValueError:
            #
            #   arg isn't a simple argument of row, so we'll have to eval it
            #
            if not hasattr(self, '_code_cache'):
                self._code_cache = {}

            if item not in self._code_cache:
                code = compile(item, '<string>', 'eval')

                # get the function library
                item_dict = dict(_numpy_function_lib.items())
                item_dict.update(self._functionlib)

                # parse to get possible fields
                itemvars_raw = get_fields_from_arg(item)

                itemvars = []
                for it in itemvars_raw:
                    try:
                        float(it)
                        is_num = True
                    except ValueError:
                        is_num = False

                    if not is_num:
                        itemvars.append(it)

                self._code_cache[item] = (code, itemvars, item_dict)

            code, itemvars, item_dict = self._code_cache[item]
            added = {}
            for it in itemvars:
                if it in self.fieldnames:
                    # pull out the fields: note, by getting the parent fields
                    # we also get the sub fields name
                    added[it] = self.__getbaseitem__(it)
                elif (it in self.__dict__) or (it in self._virtualfields):
                    # pull out any needed attributes
                    added[it] = self.__getattribute__(it, no_fallback=True)
                else:
                    # add any aliases
                    aliases = self.aliases
                    if it in aliases:
                        added[it] = self.__getbaseitem__(aliases[it])
            if item_dict is not None:
                item_dict.update(added)

            ans = eval(code, {"__builtins__": None}, item_dict)
            for k in added:
                item_dict.pop(k)
            return ans

    def __contains__(self, field):
        """Returns True if the given field name is in self's fields."""
        return field in self.fields


[docs]
    def sort(self, axis=-1, kind='quicksort', order=None):
        """Sort an array, in-place.

        This function extends the standard numpy record array in-place sort
        to allow the basic use of Field array virtual fields. Only a single
        field is currently supported when referencing a virtual field.

        Parameters
        ----------
        axis : int, optional
            Axis along which to sort. Default is -1, which means sort along the
            last axis.
        kind : {'quicksort', 'mergesort', 'heapsort'}, optional
            Sorting algorithm. Default is 'quicksort'.
        order : list, optional
            When `a` is an array with fields defined, this argument specifies
            which fields to compare first, second, etc.  Not all fields need be
            specified.
        """
        try:
            numpy.recarray.sort(self, axis=axis, kind=kind, order=order)
        except ValueError:
            if isinstance(order, list):
                raise ValueError("Cannot process more than one order field")
            self[:] = self[numpy.argsort(self[order])]



[docs]
    def addattr(self, attrname, value=None, persistent=True):
        """Adds an attribute to self. If persistent is True, the attribute will
        be made a persistent attribute. Persistent attributes are copied
        whenever a view or copy of this array is created. Otherwise, new views
        or copies of this will not have the attribute.
        """
        setattr(self, attrname, value)
        # add as persistent
        if persistent and attrname not in self.__persistent_attributes__:
            self.__persistent_attributes__.append(attrname)



[docs]
    def add_methods(self, names, methods):
        """Adds the given method(s) as instance method(s) of self. The
        method(s) must take `self` as a first argument.
        """
        if isinstance(names, str):
            names = [names]
            methods = [methods]
        for name,method in zip(names, methods):
            setattr(self, name, types.MethodType(method, self))



[docs]
    def add_properties(self, names, methods):
        """Returns a view of self with the given methods added as properties.

        From: <http://stackoverflow.com/a/2954373/1366472>.
        """
        cls = type(self)
        cls = type(cls.__name__, (cls,), dict(cls.__dict__))
        if isinstance(names, str):
            names = [names]
            methods = [methods]
        for name,method in zip(names, methods):
            setattr(cls, name, property(method))
        return self.view(type=cls)



[docs]
    def add_virtualfields(self, names, methods):
        """Returns a view of this array with the given methods added as virtual
        fields. Specifically, the given methods are added using add_properties
        and their names are added to the list of virtual fields. Virtual fields
        are properties that are assumed to operate on one or more of self's
        fields, thus returning an array of values.
        """
        if isinstance(names, str):
            names = [names]
            methods = [methods]
        out = self.add_properties(names, methods)
        if out._virtualfields is None:
            out._virtualfields = []
        out._virtualfields.extend(names)
        return out



[docs]
    def add_functions(self, names, functions):
        """Adds the given functions to the function library.

        Functions are added to this instance of the array; all copies of
        and slices of this array will also have the new functions included.

        Parameters
        ----------
        names : (list of) string(s)
            Name or list of names of the functions.
        functions : (list of) function(s)
            The function(s) to call.
        """
        if isinstance(names, str):
            names = [names]
            functions = [functions]
        if len(functions) != len(names):
            raise ValueError("number of provided names must be same as number "
                             "of functions")
        self._functionlib.update(dict(zip(names, functions)))



[docs]
    def del_functions(self, names):
        """Removes the specified function names from the function library.

        Functions are removed from this instance of the array; all copies
        and slices of this array will also have the functions removed.

        Parameters
        ----------
        names : (list of) string(s)
            Name or list of names of the functions to remove.
        """
        if isinstance(names, str):
            names = [names]
        for name in names:
            self._functionlib.pop(name)



[docs]
    @classmethod
    def from_arrays(cls, arrays, name=None, **kwargs):
        """Creates a new instance of self from the given (list of) array(s).
        This is done by calling numpy.rec.fromarrays on the given arrays with
        the given kwargs. The type of the returned array is cast to this
        class, and the name (if provided) is set.

        Parameters
        ----------
        arrays : (list of) numpy array(s)
            A list of the arrays to create the FieldArray from.
        name : {None|str}
            What the output array should be named.

        For other keyword parameters, see the numpy.rec.fromarrays help.

        Returns
        -------
        array : instance of this class
            An array that is an instance of this class in which the field
            data is from the given array(s).
        """
        obj = numpy.rec.fromarrays(arrays, **kwargs).view(type=cls)
        obj.name = name
        return obj



[docs]
    @classmethod
    def from_records(cls, records, name=None, **kwargs):
        """Creates a new instance of self from the given (list of) record(s).

        A "record" is a tuple in which each element is the value of one field
        in the resulting record array. This is done by calling
        `numpy.rec.fromrecords` on the given records with the given kwargs.
        The type of the returned array is cast to this class, and the name
        (if provided) is set.

        Parameters
        ----------
        records : (list of) tuple(s)
            A list of the tuples to create the FieldArray from.
        name : {None|str}
            What the output array should be named.

        Other Parameters
        ----------------
        For other keyword parameters, see the `numpy.rec.fromrecords` help.

        Returns
        -------
        array : instance of this class
            An array that is an instance of this class in which the field
            data is from the given record(s).
        """
        obj = numpy.rec.fromrecords(records, **kwargs).view(
            type=cls)
        obj.name = name
        return obj



[docs]
    @classmethod
    def from_kwargs(cls, **kwargs):
        """Creates a new instance of self from the given keyword arguments.
        Each argument will correspond to a field in the returned array, with
        the name of the field given by the keyword, and the value(s) whatever
        the keyword was set to. Each keyword may be set to a single value or
        a list of values. The number of values that each argument is set to
        must be the same; this will be the size of the returned array.

        Examples
        --------
        Create an array with fields 'mass1' and 'mass2':
        >>> a = FieldArray.from_kwargs(mass1=[1.1, 3.], mass2=[2., 3.])
        >>> a.fieldnames
        ('mass1', 'mass2')
        >>> a.mass1, a.mass2
        (array([ 1.1,  3. ]), array([ 2.,  3.]))

        Create an array with only a single element in it:
        >>> a = FieldArray.from_kwargs(mass1=1.1, mass2=2.)
        >>> a.mass1, a.mass2
        (array([ 1.1]), array([ 2.]))
        """
        arrays = []
        names = []
        for p,vals in kwargs.items():
            if not isinstance(vals, numpy.ndarray):
                if not isinstance(vals, list):
                    vals = [vals]
                vals = numpy.array(vals)
            arrays.append(vals)
            names.append(p)
        return cls.from_arrays(arrays, names=names)




[docs]
    @classmethod
    def from_ligolw_table(cls, table, columns=None, cast_to_dtypes=None):
        """Converts the given ligolw table into an FieldArray. The `tableName`
        attribute is copied to the array's `name`.

        Parameters
        ----------
        table : LIGOLw table instance
            The table to convert.
        columns : {None|list}
            Optionally specify a list of columns to retrieve. All of the
            columns must be in the table's validcolumns attribute. If None
            provided, all the columns in the table will be converted.
        dtype : {None | dict}
            Override the columns' dtypes using the given dictionary. The
            dictionary should be keyed by the column names, with the values
            a tuple that can be understood by numpy.dtype. For example, to
            cast a ligolw column called "foo" to a field called "bar" with
            type float, cast_to_dtypes would be: ``{"foo": ("bar", float)}``.

        Returns
        -------
        array : FieldArray
            The input table as an FieldArray.
        """
        name = table.tableName.split(':')[0]
        if columns is None:
            # get all the columns
            columns = table.validcolumns
        else:
            # note: this will raise a KeyError if one or more columns is
            # not in the table's validcolumns
            new_columns = {}
            for col in columns:
                new_columns[col] = table.validcolumns[col]
            columns = new_columns
        if cast_to_dtypes is not None:
            dtype = [cast_to_dtypes[col] for col in columns]
        else:
            dtype = list(columns.items())
        # get the values
        if _default_types_status['ilwd_as_int']:
            # columns like `process:process_id` have corresponding attributes
            # with names that are only the part after the colon, so we split
            input_array = \
                [tuple(getattr(row, col.split(':')[-1]) if dt != 'ilwd:char'
                       else int(getattr(row, col))
                       for col,dt in columns.items())
                 for row in table]
        else:
            input_array = \
                [tuple(getattr(row, col) for col in columns) for row in table]
        # return the values as an instance of cls
        return cls.from_records(input_array, dtype=dtype,
            name=name)



[docs]
    def to_array(self, fields=None, axis=0):
        """Returns an `numpy.ndarray` of self in which the fields are included
        as an extra dimension.

        Parameters
        ----------
        fields : {None, (list of) strings}
            The fields to get. All of the fields must have the same datatype.
            If None, will try to return all of the fields.
        axis : {0, int}
            Which dimension to put the fields in in the returned array. For
            example, if `self` has shape `(l,m,n)` and `k` fields, the
            returned array will have shape `(k,l,m,n)` if `axis=0`, `(l,k,m,n)`
            if `axis=1`, etc. Setting `axis=-1` will put the fields in the
            last dimension. Default is 0.

        Returns
        -------
        numpy.ndarray
            The desired fields as a numpy array.
        """
        if fields is None:
            fields = self.fieldnames
        if isinstance(fields, str):
            fields = [fields]
        return numpy.stack([self[f] for f in fields], axis=axis)


    @property
    def fieldnames(self):
        """Returns a tuple listing the field names in self. Equivalent to
        `array.dtype.names`, where `array` is self.
        """
        return self.dtype.names

    @property
    def virtualfields(self):
        """Returns a tuple listing the names of virtual fields in self.
        """
        if self._virtualfields is None:
            vfs = tuple()
        else:
            vfs = tuple(self._virtualfields)
        return vfs

    @property
    def functionlib(self):
        """Returns the library of functions that are available when calling
        items.
        """
        return self._functionlib

    @property
    def fields(self):
        """Returns a tuple listing the names of fields and virtual fields in
        self."""
        return tuple(list(self.fieldnames) + list(self.virtualfields))

    @property
    def aliases(self):
        """Returns a dictionary of the aliases, or "titles", of the field names
        in self. An alias can be specified by passing a tuple in the name
        part of the dtype. For example, if an array is created with
        ``dtype=[(('foo', 'bar'), float)]``, the array will have a field
        called `bar` that has alias `foo` that can be accessed using
        either `arr['foo']` or `arr['bar']`. Note that the first string
        in the dtype is the alias, the second the name. This function returns
        a dictionary in which the aliases are the keys and the names are the
        values. Only fields that have aliases are returned.
        """
        return dict(c[0] for c in self.dtype.descr if isinstance(c[0], tuple))


[docs]
    def add_fields(self, arrays, names=None, assubarray=False):
        """
        Adds the given arrays as new fields to self.  Returns a new instance
        with the new fields added. Note: this array does not change; the
        returned array is a new copy.

        Parameters
        ----------
        arrays : (list of) numpy array(s)
            The arrays to add. If adding multiple arrays, must be a list;
            if adding a single array, can just be that array.
        names : (list of) strings
            Optional, the name(s) of the new fields in the output array. If
            adding multiple fields, must be a list of strings with the same
            length as the list of arrays. If None provided, names used will
            be the same as the name of the datatype in the given arrays.
            If the datatype has no name, the new field will be ``'fi'`` where
            i is the index of the array in arrays.
        assubarray : bool
            Add the list of arrays as a single subarray field. If True, and
            names provided, names should be a string or a length-1 sequence.
            Default is False, in which case each array will be added as a
            separate field.

        Returns
        -------
        new_array : new instance of this array
            A copy of this array with the desired fields added.
        """
        newself = add_fields(self, arrays, names=names, assubarray=assubarray)
        self.__copy_attributes__(newself)
        return newself



[docs]
    def parse_boolargs(self, args):
        """Returns an array populated by given values, with the indices of
        those values dependent on given boolen tests on self.

        The given `args` should be a list of tuples, with the first element the
        return value and the second argument a string that evaluates to either
        True or False for each element in self.

        Each boolean argument is evaluated on elements for which every prior
        boolean argument was False. For example, if array `foo` has a field
        `bar`, and `args = [(1, 'bar < 10'), (2, 'bar < 20'), (3, 'bar < 30')]`,
        then the returned array will have `1`s at the indices for
        which `foo.bar < 10`, `2`s where `foo.bar < 20 and not foo.bar < 10`,
        and `3`s where `foo.bar < 30 and not (foo.bar < 10 or foo.bar < 20)`.

        The last argument in the list may have "else", an empty string, None,
        or simply list a return value. In any of these cases, any element not
        yet populated will be assigned the last return value.

        Parameters
        ----------
        args : {(list of) tuples, value}
            One or more return values and boolean argument determining where
            they should go.

        Returns
        -------
        return_values : array
            An array with length equal to self, with values populated with the
            return values.
        leftover_indices : array
            An array of indices that evaluated to False for all arguments.
            These indices will not have been popluated with any value,
            defaulting to whatever numpy uses for a zero for the return
            values' dtype. If there are no leftovers, an empty array is
            returned.

        Examples
        --------
        Given the following array:

        >>> arr = FieldArray(5, dtype=[('mtotal', float)])
        >>> arr['mtotal'] = numpy.array([3., 5., 2., 1., 4.])

        Return `"TaylorF2"` for all elements with `mtotal < 4` (note that the
        elements 1 and 4 are leftover):

        >>> arr.parse_boolargs(('TaylorF2', 'mtotal<4'))
            (array(['TaylorF2', '', 'TaylorF2', 'TaylorF2', ''],
            dtype='|S8'),
            array([1, 4]))

        Return `"TaylorF2"` for all elements with `mtotal < 4`,
        `"SEOBNR_ROM_DoubleSpin"` otherwise:

        >>> arr.parse_boolargs([('TaylorF2', 'mtotal<4'), ('SEOBNRv2_ROM_DoubleSpin', 'else')])
            (array(['TaylorF2', 'SEOBNRv2_ROM_DoubleSpin', 'TaylorF2', 'TaylorF2',
            'SEOBNRv2_ROM_DoubleSpin'],
            dtype='|S23'),
            array([], dtype=int64))

        The following will also return the same:

        >>> arr.parse_boolargs([('TaylorF2', 'mtotal<4'), ('SEOBNRv2_ROM_DoubleSpin',)])
        >>> arr.parse_boolargs([('TaylorF2', 'mtotal<4'), ('SEOBNRv2_ROM_DoubleSpin', '')])
        >>> arr.parse_boolargs([('TaylorF2', 'mtotal<4'), 'SEOBNRv2_ROM_DoubleSpin'])

        Return `"TaylorF2"` for all elements with `mtotal < 3`, `"IMRPhenomD"`
        for all elements with `3 <= mtotal < 4`, `"SEOBNRv2_ROM_DoubleSpin"`
        otherwise:

        >>> arr.parse_boolargs([('TaylorF2', 'mtotal<3'), ('IMRPhenomD', 'mtotal<4'), 'SEOBNRv2_ROM_DoubleSpin'])
            (array(['IMRPhenomD', 'SEOBNRv2_ROM_DoubleSpin', 'TaylorF2', 'TaylorF2',
            'SEOBNRv2_ROM_DoubleSpin'],
            dtype='|S23'),
            array([], dtype=int64))

        Just return `"TaylorF2"` for all elements:

        >>> arr.parse_boolargs('TaylorF2')
            (array(['TaylorF2', 'TaylorF2', 'TaylorF2', 'TaylorF2', 'TaylorF2'],
            dtype='|S8'),
            array([], dtype=int64))

        """
        if not isinstance(args, list):
            args = [args]
        # format the arguments
        return_vals = []
        bool_args = []
        for arg in args:
            if not isinstance(arg, tuple):
                return_val = arg
                bool_arg = None
            elif len(arg) == 1:
                return_val = arg[0]
                bool_arg = None
            elif len(arg) == 2:
                return_val, bool_arg = arg
            else:
                raise ValueError("argument not formatted correctly")
            return_vals.append(return_val)
            bool_args.append(bool_arg)
        # get the output dtype
        outdtype = numpy.array(return_vals).dtype
        out = numpy.zeros(self.size, dtype=outdtype)
        mask = numpy.zeros(self.size, dtype=bool)
        leftovers = numpy.ones(self.size, dtype=bool)
        for ii,(boolarg,val) in enumerate(zip(bool_args, return_vals)):
            if boolarg is None or boolarg == '' or boolarg.lower() == 'else':
                if ii+1 != len(bool_args):
                    raise ValueError("only the last item may not provide "
                        "any boolean arguments")
                mask = leftovers
            else:
                mask = leftovers & self[boolarg]
            out[mask] = val
            leftovers &= ~mask
        return out, numpy.where(leftovers)[0]



[docs]
    def append(self, other):
        """Appends another array to this array.

        The returned array will have all of the class methods and virutal
        fields of this array, including any that were added using `add_method`
        or `add_virtualfield`. If this array and other array have one or more
        string fields, the dtype for those fields are updated to a string
        length that can encompass the longest string in both arrays.

        .. note::
            Increasing the length of strings only works for fields, not
            sub-fields.

        Parameters
        ----------
        other : array
            The array to append values from. It must have the same fields and
            dtype as this array, modulo the length of strings. If the other
            array does not have the same dtype, a TypeError is raised.

        Returns
        -------
        array
            An array with others values appended to this array's values. The
            returned array is an instance of the same class as this array,
            including all methods and virtual fields.
        """
        try:
            return numpy.append(self, other).view(type=self.__class__)
        except TypeError:
            # see if the dtype error was due to string fields having different
            # lengths; if so, we'll make the joint field the larger of the
            # two
            str_fields = [name for name in self.fieldnames
                          if _isstring(self.dtype[name])]
            # get the larger of the two
            new_strlens = dict(
                [[name,
                  max(self.dtype[name].itemsize, other.dtype[name].itemsize)]
                 for name in str_fields]
            )
            # cast both to the new string lengths
            new_dt = []
            for dt in self.dtype.descr:
                name = dt[0]
                if name in new_strlens:
                    dt = (name, self.dtype[name].type, new_strlens[name])
                new_dt.append(dt)
            new_dt = numpy.dtype(new_dt)
            return numpy.append(
                self.astype(new_dt),
                other.astype(new_dt)
                ).view(type=self.__class__)



[docs]
    @classmethod
    def parse_parameters(cls, parameters, possible_fields):
        """Parses a list of parameters to get the list of fields needed in
        order to evaluate those parameters.

        Parameters
        ----------
        parameters : (list of) string(s)
            The list of desired parameters. These can be (functions of) fields
            or virtual fields.
        possible_fields : (list of) string(s)
            The list of possible fields.

        Returns
        -------
        list :
            The list of names of the fields that are needed in order to
            evaluate the given parameters.
        """
        if isinstance(possible_fields, str):
            possible_fields = [possible_fields]
        possible_fields = list(map(str, possible_fields))
        # we'll just use float as the dtype, as we just need this for names
        arr = cls(1, dtype=list(zip(possible_fields,
                                len(possible_fields)*[float])))
        # try to perserve order
        return list(get_needed_fieldnames(arr, parameters))



def _isstring(dtype):
    """Given a numpy dtype, determines whether it is a string. Returns True
    if the dtype is string or unicode.
    """
    return dtype.type == numpy.unicode_ or dtype.type == numpy.bytes_


def aliases_from_fields(fields):
    """Given a dictionary of fields, will return a dictionary mapping the
    aliases to the names.
    """
    return dict(c for c in fields if isinstance(c, tuple))


def fields_from_names(fields, names=None):
    """Given a dictionary of fields and a list of names, will return a
    dictionary consisting of the fields specified by names. Names can be
    either the names of fields, or their aliases.
    """

    if names is None:
        return fields
    if isinstance(names, str):
        names = [names]
    aliases_to_names = aliases_from_fields(fields)
    names_to_aliases = dict(zip(aliases_to_names.values(),
        aliases_to_names.keys()))
    outfields = {}
    for name in names:
        try:
            outfields[name] = fields[name]
        except KeyError:
            if name in aliases_to_names:
                key = (name, aliases_to_names[name])
            elif name in names_to_aliases:
                key = (names_to_aliases[name], name)
            else:
                raise KeyError('default fields has no field %s' % name)
            outfields[key] = fields[key]
    return outfields


#
# =============================================================================
#
#                           FieldArray with default fields
#
# =============================================================================
#

class _FieldArrayWithDefaults(FieldArray):
    """
    Subclasses FieldArray, adding class attribute ``_staticfields``, and
    class method ``default_fields``. The ``_staticfields`` should be a
    dictionary that defines some field names and corresponding dtype. The
    ``default_fields`` method returns a dictionary of the static fields
    and any default virtualfields that were added. A field array can then
    be initialized in one of 3 ways:

     1. With just a shape. In this case, the returned array will have all
     of the default fields.

     2. With a shape and a list of names, given by the ``names`` keyword
     argument. The names may be default fields, virtual fields, a method or
     property of the class, or any python function of these things. If a
     virtual field, method, or property is in the names, the needed underlying
     fields will be included in the return array. For example, if the class
     has a virtual field called 'mchirp', which is a function of fields called
     'mass1' and 'mass2', then 'mchirp' or any function of 'mchirp' may be
     included in the list of names (e.g., names=['mchirp**(5/6)']). If so, the
     returned array will have fields 'mass1' and 'mass2' even if these were
     not specified in names, so that 'mchirp' may be used without error.
     names must be names of either default fields or virtualfields, else a
     KeyError is raised.

     3. With a shape and a dtype. Any field specified by the dtype will be
     used. The fields need not be in the list of default fields, and/or the
     dtype can be different than that specified by the default fields.

    If additional fields are desired beyond the default fields, these can
    be specified using the ``additional_fields`` keyword argument; these should
    be provided in the same way as ``dtype``; i.e, as a list of (name, dtype)
    tuples.

    This class does not define any static fields, and ``default_fields`` just
    returns an empty dictionary. This class is mostly meant to be subclassed
    by other classes, so they can add their own defaults.
    """

    _staticfields = {}
    @classmethod
    def default_fields(cls, include_virtual=True, **kwargs):
        """The default fields and their dtypes. By default, this returns
        whatever the class's ``_staticfields`` and ``_virtualfields`` is set
        to as a dictionary of fieldname, dtype (the dtype of virtualfields is
        given by VIRTUALFIELD_DTYPE). This function should be overridden by
        subclasses to add dynamic fields; i.e., fields that require some input
        parameters at initialization. Keyword arguments can be passed to this
        to set such dynamic fields.
        """
        output = cls._staticfields.copy()
        if include_virtual:
            output.update({name: VIRTUALFIELD_DTYPE
                           for name in cls._virtualfields})
        return output

    def __new__(cls, shape, name=None, additional_fields=None,
                field_kwargs=None, **kwargs):
        """The ``additional_fields`` should be specified in the same way as
        ``dtype`` is normally given to FieldArray. The ``field_kwargs`` are
        passed to the class's default_fields method as keyword arguments.
        """
        if field_kwargs is None:
            field_kwargs = {}
        if 'names' in kwargs and 'dtype' in kwargs:
            raise ValueError("Please provide names or dtype, not both")
        default_fields = cls.default_fields(include_virtual=False,
            **field_kwargs)
        if 'names' in kwargs:
            names = kwargs.pop('names')
            if isinstance(names, str):
                names = [names]
            # evaluate the names to figure out what base fields are needed
            # to do this, we'll create a small default instance of self (since
            # no names are specified in the following initialization, this
            # block of code is skipped)
            arr = cls(1, field_kwargs=field_kwargs)
            # try to perserve order
            sortdict = dict([[nm, ii] for ii,nm in enumerate(names)])
            names = list(get_needed_fieldnames(arr, names))
            names.sort(key=lambda x: sortdict[x] if x in sortdict
                else len(names))
            # add the fields as the dtype argument for initializing
            kwargs['dtype'] = [(fld, default_fields[fld]) for fld in names]
        if 'dtype' not in kwargs:
            kwargs['dtype'] = list(default_fields.items())
        # add the additional fields
        if additional_fields is not None:
            if not isinstance(additional_fields, list):
                additional_fields = [additional_fields]
            if not isinstance(kwargs['dtype'], list):
                kwargs['dtype'] = [kwargs['dtype']]
            kwargs['dtype'] += additional_fields
        return super(_FieldArrayWithDefaults, cls).__new__(cls, shape,
            name=name, **kwargs)

    def add_default_fields(self, names, **kwargs):
        """
        Adds one or more empty default fields to self.

        Parameters
        ----------
        names : (list of) string(s)
            The names of the fields to add. Must be a field in self's default
            fields.

        Other keyword args are any arguments passed to self's default fields.

        Returns
        -------
        new array : instance of this array
            A copy of this array with the field added.
        """
        if isinstance(names, str):
            names = [names]
        default_fields = self.default_fields(include_virtual=False, **kwargs)
        # parse out any virtual fields
        arr = self.__class__(1, field_kwargs=kwargs)
        # try to perserve order
        sortdict = dict([[nm, ii] for ii,nm in enumerate(names)])
        names = list(get_needed_fieldnames(arr, names))
        names.sort(key=lambda x: sortdict[x] if x in sortdict
            else len(names))
        fields = [(name, default_fields[name]) for name in names]
        arrays = []
        names = []
        for name,dt in fields:
            arrays.append(default_empty(self.size, dtype=[(name, dt)]))
            names.append(name)
        return self.add_fields(arrays, names)

    @classmethod
    def parse_parameters(cls, parameters, possible_fields=None):
        """Parses a list of parameters to get the list of fields needed in
        order to evaluate those parameters.

        Parameters
        ----------
        parameters : (list of) strings
            The list of desired parameters. These can be (functions of) fields
            or virtual fields.
        possible_fields : {None, dict}
            Specify the list of possible fields. Must be a dictionary given
            the names, and dtype of each possible field. If None, will use this
            class's `_staticfields`.

        Returns
        -------
        list :
            The list of names of the fields that are needed in order to
            evaluate the given parameters.
        """
        if possible_fields is not None:
            # make sure field names are strings and not unicode
            possible_fields = dict([[f, dt]
                for f,dt in possible_fields.items()])
            class ModifiedArray(cls):
                _staticfields = possible_fields
            cls = ModifiedArray
        return cls(1, names=parameters).fieldnames

#
# =============================================================================
#
#                           WaveformArray
#
# =============================================================================
#


[docs]
class WaveformArray(_FieldArrayWithDefaults):
    """
    A FieldArray with some default fields and properties commonly used
    by CBC waveforms. This may be initialized in one of 3 ways:

    1. With just the size of the array. In this case, the returned array will
    have all of the default field names. Example:

    >>> warr = WaveformArray(10)
    >>> warr.fieldnames
        ('distance',
         'spin2x',
         'mass1',
         'mass2',
         'lambda1',
         'polarization',
         'spin2y',
         'spin2z',
         'spin1y',
         'spin1x',
         'spin1z',
         'inclination',
         'coa_phase',
         'dec',
         'tc',
         'lambda2',
         'ra')

    2. With some subset of the default field names. Example:

    >>> warr = WaveformArray(10, names=['mass1', 'mass2'])
    >>> warr.fieldnames
        ('mass1', 'mass2')

    The list of names may include virtual fields, and methods, as well as
    functions of these. If one or more virtual fields or methods are specified,
    the source code is analyzed to pull out whatever underlying fields are
    needed. Example:

    >>> warr = WaveformArray(10, names=['mchirp**(5/6)', 'chi_eff', 'cos(coa_phase)'])
    >>> warr.fieldnames
        ('spin2z', 'mass1', 'mass2', 'coa_phase', 'spin1z')

    3. By specifying a dtype. In this case, only the provided fields will
    be used, even if they are not in the defaults. Example:

    >>> warr = WaveformArray(10, dtype=[('foo', float)])
    >>> warr.fieldnames
        ('foo',)

    Additional fields can also be specified using the additional_fields
    keyword argument. Example:

    >>> warr = WaveformArray(10, names=['mass1', 'mass2'], additional_fields=[('bar', float)])
    >>> warr.fieldnames
        ('mass1', 'mass2', 'bar')

    .. note::
        If an array is initialized with all of the default fields (case 1,
        above), then the names come from waveform.parameters; i.e., they
        are actually Parameter instances, not just strings. This means that the
        field names carry all of the metadata that a Parameter has. For
        example:

        >>> warr = WaveformArray(10)
        >>> warr.fields[0]
            'distance'
        >>> warr.fields[0].description
            'Luminosity distance to the binary (in Mpc).'
        >>> warr.fields[0].label
            '$d_L$ (Mpc)'

    """
    _staticfields = (parameters.cbc_intrinsic_params +
                     parameters.extrinsic_params).dtype_dict

    _virtualfields = [
        parameters.mchirp, parameters.eta, parameters.mtotal,
        parameters.q, parameters.primary_mass, parameters.secondary_mass,
        parameters.chi_eff,
        parameters.spin_px, parameters.spin_py, parameters.spin_pz,
        parameters.spin_sx, parameters.spin_sy, parameters.spin_sz,
        parameters.spin1_a, parameters.spin1_azimuthal, parameters.spin1_polar,
        parameters.spin2_a, parameters.spin2_azimuthal, parameters.spin2_polar,
        parameters.remnant_mass]

    @property
    def primary_mass(self):
        """Returns the larger of self.mass1 and self.mass2."""
        return conversions.primary_mass(self.mass1, self.mass2)

    @property
    def secondary_mass(self):
        """Returns the smaller of self.mass1 and self.mass2."""
        return conversions.secondary_mass(self.mass1, self.mass)

    @property
    def mtotal(self):
        """Returns the total mass."""
        return conversions.mtotal_from_mass1_mass2(self.mass1, self.mass2)

    @property
    def q(self):
        """Returns the mass ratio m1/m2, where m1 >= m2."""
        return conversions.q_from_mass1_mass2(self.mass1, self.mass2)

    @property
    def eta(self):
        """Returns the symmetric mass ratio."""
        return conversions.eta_from_mass1_mass2(self.mass1, self.mass2)

    @property
    def mchirp(self):
        """Returns the chirp mass."""
        return conversions.mchirp_from_mass1_mass2(self.mass1, self.mass2)

    @property
    def chi_eff(self):
        """Returns the effective spin."""
        return conversions.chi_eff(self.mass1, self.mass2, self.spin1z,
                                   self.spin2z)

    @property
    def spin_px(self):
        """Returns the x-component of the spin of the primary mass."""
        return conversions.primary_spin(self.mass1, self.mass2, self.spin1x,
                                        self.spin2x)

    @property
    def spin_py(self):
        """Returns the y-component of the spin of the primary mass."""
        return conversions.primary_spin(self.mass1, self.mass2, self.spin1y,
                                        self.spin2y)

    @property
    def spin_pz(self):
        """Returns the z-component of the spin of the primary mass."""
        return conversions.primary_spin(self.mass1, self.mass2, self.spin1z,
                                        self.spin2z)

    @property
    def spin_sx(self):
        """Returns the x-component of the spin of the secondary mass."""
        return conversions.secondary_spin(self.mass1, self.mass2, self.spin1x,
                                        self.spin2x)

    @property
    def spin_sy(self):
        """Returns the y-component of the spin of the secondary mass."""
        return conversions.secondary_spin(self.mass1, self.mass2, self.spin1y,
                                        self.spin2y)

    @property
    def spin_sz(self):
        """Returns the z-component of the spin of the secondary mass."""
        return conversions.secondary_spin(self.mass1, self.mass2, self.spin1z,
                                        self.spin2z)

    @property
    def spin1_a(self):
        """Returns the dimensionless spin magnitude of mass 1."""
        return coordinates.cartesian_to_spherical_rho(
                                    self.spin1x, self.spin1y, self.spin1z)

    @property
    def spin1_azimuthal(self):
        """Returns the azimuthal spin angle of mass 1."""
        return coordinates.cartesian_to_spherical_azimuthal(
                                     self.spin1x, self.spin1y)

    @property
    def spin1_polar(self):
        """Returns the polar spin angle of mass 1."""
        return coordinates.cartesian_to_spherical_polar(
                                     self.spin1x, self.spin1y, self.spin1z)

    @property
    def spin2_a(self):
        """Returns the dimensionless spin magnitude of mass 2."""
        return coordinates.cartesian_to_spherical_rho(
                                    self.spin1x, self.spin1y, self.spin1z)

    @property
    def spin2_azimuthal(self):
        """Returns the azimuthal spin angle of mass 2."""
        return coordinates.cartesian_to_spherical_azimuthal(
                                     self.spin2x, self.spin2y)

    @property
    def spin2_polar(self):
        """Returns the polar spin angle of mass 2."""
        return coordinates.cartesian_to_spherical_polar(
                                     self.spin2x, self.spin2y, self.spin2z)

    @property
    def remnant_mass(self):
        """Returns the remnant mass for an NS-BH binary."""
        return conversions.remnant_mass_from_mass1_mass2_cartesian_spin_eos(
                                     self.mass1, self.mass2,
                                     spin1x=self.spin1x,
                                     spin1y=self.spin1y,
                                     spin1z=self.spin1z)



__all__ = ['FieldArray', 'WaveformArray']