"""
Code with some common routines.
"""
from functools import partial
from nnsa.annotations import Annotation
from nnsa.annotations.annotation_set import AnnotationSet
from nnsa.annotations.config import STANDARD_ANNOTATIONS
from nnsa.io.readers import EdfReader
__all__ = [
'read_annotation_set_KL',
'read_annotation_set_preterm',
]
[docs]def read_annotation_set_KL(filepath):
"""
Helper function to read annotations from an EDF+ file as annotated by KL (using **).
Args:
filepath (str): filepath of the EDF+.
Returns:
annotation_set (nnsa.AnnotationSet): object containing the annotations from KL.
"""
with EdfReader(filepath) as r:
assert r.is_anonymized
total_duration = r.additional_info['total_duration']
# Read annotation set (used '**' to indicate her annotations).
annotation_set = r.read_annotations()
# To nnsa AnnotationSet to get more functionality than the edfreadpy class.
annotation_set = AnnotationSet(annotations=annotation_set)\
.filter('**')\
.compute_nan_durations(total_duration=total_duration)
return annotation_set
[docs]def read_annotation_set_preterm(df, filename, include_dubious_qs=False, training_only=False):
"""
Helper function to read sleep labels from a dataframe for the preterm dataset.
Args:
df (pd.DataFrame): dataframe with the contents of the xlsx file with the sleep labels
(e.g. QS_labels_preterm_dataset.xlsx).
filename (str): the EDF filename to extract the sleep labels of. This EDF filename should
be in the dataframe as column 'Filename'.
include_dubious_qs (bool, optional): include the dubious QS segments (Dubious QS column in xlsx).
Defaults to False.
training_only (bool, optional): use only the QS segments used for training (Training column in xlsx).
Defaults to False.
Returns:
annotation_set (nnsa.AnnotationSet): object containing the annotations.
"""
def to_annotation(row, include_dubious_qs, training_only):
# Onset and duration.
onset = row['Start QS seconds']
duration = row['Duration QS seconds']
# Determine annotation text.
if row['Real QS'] == 1:
text = STANDARD_ANNOTATIONS['quiet_sleep']
elif row['Dubious QS'] == 1:
if include_dubious_qs:
text = STANDARD_ANNOTATIONS['quiet_sleep']
else:
text = STANDARD_ANNOTATIONS['dubious']
elif row['Artefact QS'] == 1:
text = STANDARD_ANNOTATIONS['quiet_sleep'] + STANDARD_ANNOTATIONS['additional_artefact']
else:
raise NotImplementedError('Segment not marked as either Real, Dubious or Artefact QS.')
if training_only:
# If training only is requested, set non-training segments to no_label.
if row['Training'] != 1:
text = STANDARD_ANNOTATIONS['no_label']
return Annotation(onset, duration, text)
# Select current patient data.
df_pat = df[df['Filename'] == filename]
# Total duration of recording.
duration_recording = df_pat['Duration recording seconds'].iat[0]
# Initialize annotations list.
annotations = []
# Check for unscored part at begin.
unscored_duration = df_pat['Start seconds'].dropna().values
if len(unscored_duration) > 0:
if len(unscored_duration) == 1:
onset = 0.0
duration = unscored_duration[0]
text = STANDARD_ANNOTATIONS['no_label']
annotations.append(Annotation(onset, duration, text))
else:
raise NotImplementedError('Expected at most 1 Start value per file. Got {}.'.format(len(unscored_duration)))
# Check for unscored part at end.
unscored_onset = df_pat['Stop seconds'].dropna().values
if len(unscored_onset) > 0:
if len(unscored_onset) == 1:
onset = unscored_onset[0]
duration = duration_recording - onset
text = STANDARD_ANNOTATIONS['no_label']
annotations.append(Annotation(onset, duration, text))
else:
raise NotImplementedError('Expected at most 1 Stop value per file. Got {}.'.format(len(unscored_onset)))
# Convert rows to annotations ('QS', 'DUBIOUS', 'QS + ARTEFACT' or 'NL').
to_annotation_partial = partial(to_annotation, include_dubious_qs=include_dubious_qs, training_only=training_only)
df_pat['Annotation'] = df_pat.apply(to_annotation_partial, axis=1)
# Collect annotations in an AnnotationSet.
annotations.extend(df_pat['Annotation'].values.tolist())
annotation_set = AnnotationSet(annotations, label='sleep')
# Fill unlabeled periods with 'NQS' (also sorts the annotations). Note that the unscored parts are already filled
# by 'no_label'.
annotation_set.fill_unlabeled_periods(label_to_insert=STANDARD_ANNOTATIONS['non_quiet_sleep'],
begin=0.0,
end=duration_recording,
inplace=True)
return annotation_set