Source code for nnsa.io.routines

"""
Code with some common routines.
"""
from functools import partial

from nnsa.annotations import Annotation

from nnsa.annotations.annotation_set import AnnotationSet
from nnsa.annotations.config import STANDARD_ANNOTATIONS
from nnsa.io.readers import EdfReader

__all__ = [
    'read_annotation_set_KL',
    'read_annotation_set_preterm',
]


[docs]def read_annotation_set_KL(filepath): """ Helper function to read annotations from an EDF+ file as annotated by KL (using **). Args: filepath (str): filepath of the EDF+. Returns: annotation_set (nnsa.AnnotationSet): object containing the annotations from KL. """ with EdfReader(filepath) as r: assert r.is_anonymized total_duration = r.additional_info['total_duration'] # Read annotation set (used '**' to indicate her annotations). annotation_set = r.read_annotations() # To nnsa AnnotationSet to get more functionality than the edfreadpy class. annotation_set = AnnotationSet(annotations=annotation_set)\ .filter('**')\ .compute_nan_durations(total_duration=total_duration) return annotation_set
[docs]def read_annotation_set_preterm(df, filename, include_dubious_qs=False, training_only=False): """ Helper function to read sleep labels from a dataframe for the preterm dataset. Args: df (pd.DataFrame): dataframe with the contents of the xlsx file with the sleep labels (e.g. QS_labels_preterm_dataset.xlsx). filename (str): the EDF filename to extract the sleep labels of. This EDF filename should be in the dataframe as column 'Filename'. include_dubious_qs (bool, optional): include the dubious QS segments (Dubious QS column in xlsx). Defaults to False. training_only (bool, optional): use only the QS segments used for training (Training column in xlsx). Defaults to False. Returns: annotation_set (nnsa.AnnotationSet): object containing the annotations. """ def to_annotation(row, include_dubious_qs, training_only): # Onset and duration. onset = row['Start QS seconds'] duration = row['Duration QS seconds'] # Determine annotation text. if row['Real QS'] == 1: text = STANDARD_ANNOTATIONS['quiet_sleep'] elif row['Dubious QS'] == 1: if include_dubious_qs: text = STANDARD_ANNOTATIONS['quiet_sleep'] else: text = STANDARD_ANNOTATIONS['dubious'] elif row['Artefact QS'] == 1: text = STANDARD_ANNOTATIONS['quiet_sleep'] + STANDARD_ANNOTATIONS['additional_artefact'] else: raise NotImplementedError('Segment not marked as either Real, Dubious or Artefact QS.') if training_only: # If training only is requested, set non-training segments to no_label. if row['Training'] != 1: text = STANDARD_ANNOTATIONS['no_label'] return Annotation(onset, duration, text) # Select current patient data. df_pat = df[df['Filename'] == filename] # Total duration of recording. duration_recording = df_pat['Duration recording seconds'].iat[0] # Initialize annotations list. annotations = [] # Check for unscored part at begin. unscored_duration = df_pat['Start seconds'].dropna().values if len(unscored_duration) > 0: if len(unscored_duration) == 1: onset = 0.0 duration = unscored_duration[0] text = STANDARD_ANNOTATIONS['no_label'] annotations.append(Annotation(onset, duration, text)) else: raise NotImplementedError('Expected at most 1 Start value per file. Got {}.'.format(len(unscored_duration))) # Check for unscored part at end. unscored_onset = df_pat['Stop seconds'].dropna().values if len(unscored_onset) > 0: if len(unscored_onset) == 1: onset = unscored_onset[0] duration = duration_recording - onset text = STANDARD_ANNOTATIONS['no_label'] annotations.append(Annotation(onset, duration, text)) else: raise NotImplementedError('Expected at most 1 Stop value per file. Got {}.'.format(len(unscored_onset))) # Convert rows to annotations ('QS', 'DUBIOUS', 'QS + ARTEFACT' or 'NL'). to_annotation_partial = partial(to_annotation, include_dubious_qs=include_dubious_qs, training_only=training_only) df_pat['Annotation'] = df_pat.apply(to_annotation_partial, axis=1) # Collect annotations in an AnnotationSet. annotations.extend(df_pat['Annotation'].values.tolist()) annotation_set = AnnotationSet(annotations, label='sleep') # Fill unlabeled periods with 'NQS' (also sorts the annotations). Note that the unscored parts are already filled # by 'no_label'. annotation_set.fill_unlabeled_periods(label_to_insert=STANDARD_ANNOTATIONS['non_quiet_sleep'], begin=0.0, end=duration_recording, inplace=True) return annotation_set