import logging
import os
import pathlib
import datetime
import numpy
from lal import gpstime as lalgps
logger = logging.getLogger('pycbc.io.live')
def maximum_string(numbers):
"""
Find the maximum possible length string to match
all values between two numbers
Parameters
----------
numbers : list of integers
A list of integers from which to determine the longest
common string prefix. E.g. '12345', '12346', '12356'
returns '123'
"""
# The max length of the number will be the integer above log10
# of the biggest number
maxlen = int(numpy.ceil(numpy.log10(max(numbers))))
# Convert the numbers to (possibly leading zero-padded) strings
strings = [f"{{n:0{maxlen:d}d}}".format(n=n) for n in numbers]
# Count how many digits are the same:
n_digits = 0
for str_digit in zip(*strings):
if len(numpy.unique(str_digit)) == 1:
# This digit is the same for all numbers
n_digits += 1
else:
break
return strings[0][:n_digits]
def filter_file(filename, start_time, end_time):
"""
Indicate whether the filename indicates that the file is within the
start and end times
Parameters
----------
filename : string
Filename which matches the format
{id_string}-{start_time}-{duration}.hdf
start_time : float
Start of search window, i.e. GPS time of when the
file cannot end before
end_time : float
End of search window, i.e. GPS time of when the
file cannot start after
Returns
-------
boolean
Does any of the file lie within the start/end times
"""
# FIX ME eventually - this uses the gps time and duration from the filename
# Is there a better way? (i.e. trigger gps times in the file or
# add an attribute)
fend = filename.split('-')[-2:]
file_start = float(fend[0])
duration = float(fend[1][:-4])
return ((file_start + duration) >= start_time) and (file_start <= end_time)
[docs]
def add_live_trigger_selection_options(parser):
"""
Add options required for obtaining the right set of PyCBC live triggers
into an argument parser
"""
finding_group = parser.add_argument_group('Trigger Finding')
finding_group.add_argument(
"--trigger-directory",
metavar="PATH",
required=True,
help="Directory containing trigger files, directory "
"can contain subdirectories. Required."
)
finding_group.add_argument(
"--gps-start-time",
type=int,
required=True,
help="Start time of the analysis. Integer, required"
)
finding_group.add_argument(
"--gps-end-time",
type=int,
required=True,
help="End time of the analysis. Integer, required"
)
finding_group.add_argument(
"--date-directories",
action="store_true",
help="Indicate if the trigger files are stored in "
"directories by date."
)
default_dd_format = "%Y_%m_%d"
finding_group.add_argument(
"--date-directory-format",
default=default_dd_format,
help="Format of date, see datetime strftime "
"documentation for details. Default: "
"%%Y_%%m_%%d"
)
finding_group.add_argument(
"--file-identifier",
default="H1L1V1-Live",
help="String required in filename to be considered for "
"analysis. Default: 'H1L1V1-Live'."
)
[docs]
def find_trigger_files(directory, gps_start_time, gps_end_time,
id_string='*', date_directories=False,
date_directory_format="%Y_%m_%d"):
"""
Find a list of PyCBC live trigger files which are between the gps
start and end times given
"""
# Find the string at the start of the gps time which will match all
# files in this range - this helps to cut which ones we need to
# compare later
num_match = maximum_string([gps_start_time, gps_end_time])
# ** means recursive, so for large directories, this is expensive.
# It is not too bad if date_directories is set, as we don't waste time
# in directories where there cant be any files.
glob_string = f'**/*{id_string}*{num_match}*.hdf'
if date_directories:
# convert the GPS times into dates, and only use the directories
# of those dates to search
# Add a day on either side to ensure we get files which straddle
# the boundary
one_day = datetime.timedelta(days=1)
date_check = lalgps.gps_to_utc(gps_start_time) - one_day
date_end = lalgps.gps_to_utc(gps_end_time) + one_day
matching_files = []
while date_check < date_end:
date_dir = date_check.strftime(date_directory_format)
subdir = os.path.join(directory, date_dir)
matching_files_gen = pathlib.Path(subdir).glob(glob_string)
matching_files += [f.as_posix() for f in matching_files_gen]
date_check += one_day
else:
# Grab all hdf files in the directory
matching_files_gen = pathlib.Path(directory).glob(glob_string)
matching_files = [f.as_posix() for f in matching_files_gen]
# Is the file in the time window?
matching_files = [f for f in matching_files
if filter_file(f, gps_start_time, gps_end_time)]
return sorted(matching_files)
[docs]
def find_trigger_files_from_cli(args):
"""
Wrapper around the find_trigger_files function to use when called using
options from the add_live_trigger_selection_options function
"""
return find_trigger_files(
args.trigger_directory,
args.gps_start_time,
args.gps_end_time,
id_string=args.file_identifier,
date_directories=args.date_directories,
date_directory_format=args.date_directory_format
)
__all__ = [
'add_live_trigger_selection_options',
'find_trigger_files',
'find_trigger_files_from_cli',
]