Source code for nnsa.io.routines

"""
Code with some common routines.
"""
from functools import partial

from nnsa.annotations import Annotation

from nnsa.annotations.annotation_set import AnnotationSet
from nnsa.annotations.config import STANDARD_ANNOTATIONS
from nnsa.io.readers import EdfReader

__all__ = [
    'read_annotation_set_KL',
    'read_annotation_set_preterm',
]


[docs]def read_annotation_set_KL(filepath):
    """
    Helper function to read annotations from an EDF+ file as annotated by KL (using **).

    Args:
        filepath (str): filepath of the EDF+.

    Returns:
        annotation_set (nnsa.AnnotationSet): object containing the annotations from KL.
    """
    with EdfReader(filepath) as r:
        assert r.is_anonymized
        total_duration = r.additional_info['total_duration']

        # Read annotation set (used '**' to indicate her annotations).
        annotation_set = r.read_annotations()

        # To nnsa AnnotationSet to get more functionality than the edfreadpy class.
        annotation_set = AnnotationSet(annotations=annotation_set)\
            .filter('**')\
            .compute_nan_durations(total_duration=total_duration)

    return annotation_set


[docs]def read_annotation_set_preterm(df, filename, include_dubious_qs=False, training_only=False):
    """
    Helper function to read sleep labels from a dataframe for the preterm dataset.

    Args:
        df (pd.DataFrame): dataframe with the contents of the xlsx file with the sleep labels
            (e.g. QS_labels_preterm_dataset.xlsx).
        filename (str): the EDF filename to extract the sleep labels of. This EDF filename should
            be in the dataframe as column 'Filename'.
        include_dubious_qs (bool, optional): include the dubious QS segments (Dubious QS column in xlsx).
            Defaults to False.
        training_only (bool, optional): use only the QS segments used for training (Training column in xlsx).
            Defaults to False.

    Returns:
        annotation_set (nnsa.AnnotationSet): object containing the annotations.
    """
    def to_annotation(row, include_dubious_qs, training_only):
        # Onset and duration.
        onset = row['Start QS seconds']
        duration = row['Duration QS seconds']

        # Determine annotation text.
        if row['Real QS'] == 1:
            text = STANDARD_ANNOTATIONS['quiet_sleep']
        elif row['Dubious QS'] == 1:
            if include_dubious_qs:
                text = STANDARD_ANNOTATIONS['quiet_sleep']
            else:
                text = STANDARD_ANNOTATIONS['dubious']
        elif row['Artefact QS'] == 1:
            text = STANDARD_ANNOTATIONS['quiet_sleep'] + STANDARD_ANNOTATIONS['additional_artefact']
        else:
            raise NotImplementedError('Segment not marked as either Real, Dubious or Artefact QS.')

        if training_only:
            # If training only is requested, set non-training segments to no_label.
            if row['Training'] != 1:
                text = STANDARD_ANNOTATIONS['no_label']

        return Annotation(onset, duration, text)

    # Select current patient data.
    df_pat = df[df['Filename'] == filename]

    # Total duration of recording.
    duration_recording = df_pat['Duration recording seconds'].iat[0]

    # Initialize annotations list.
    annotations = []

    # Check for unscored part at begin.
    unscored_duration = df_pat['Start seconds'].dropna().values
    if len(unscored_duration) > 0:
        if len(unscored_duration) == 1:
            onset = 0.0
            duration = unscored_duration[0]
            text = STANDARD_ANNOTATIONS['no_label']
            annotations.append(Annotation(onset, duration, text))
        else:
            raise NotImplementedError('Expected at most 1 Start value per file. Got {}.'.format(len(unscored_duration)))

    # Check for unscored part at end.
    unscored_onset = df_pat['Stop seconds'].dropna().values
    if len(unscored_onset) > 0:
        if len(unscored_onset) == 1:
            onset = unscored_onset[0]
            duration = duration_recording - onset
            text = STANDARD_ANNOTATIONS['no_label']
            annotations.append(Annotation(onset, duration, text))
        else:
            raise NotImplementedError('Expected at most 1 Stop value per file. Got {}.'.format(len(unscored_onset)))

    # Convert rows to annotations ('QS', 'DUBIOUS', 'QS + ARTEFACT' or 'NL').
    to_annotation_partial = partial(to_annotation, include_dubious_qs=include_dubious_qs, training_only=training_only)
    df_pat['Annotation'] = df_pat.apply(to_annotation_partial, axis=1)

    # Collect annotations in an AnnotationSet.
    annotations.extend(df_pat['Annotation'].values.tolist())
    annotation_set = AnnotationSet(annotations, label='sleep')

    # Fill unlabeled periods with 'NQS' (also sorts the annotations). Note that the unscored parts are already filled
    # by 'no_label'.
    annotation_set.fill_unlabeled_periods(label_to_insert=STANDARD_ANNOTATIONS['non_quiet_sleep'],
                                          begin=0.0,
                                          end=duration_recording,
                                          inplace=True)

    return annotation_set