Source code for nnsa.annotations.sleep_stages

"""
Code related to handling annotations containing sleep stages.
"""
from nnsa.annotations.config import STANDARD_ANNOTATIONS, LABEL_ANNOTATIONS_MAPPING, SLEEP_LABELS

__all__ = [
    'annotation_to_label',
    'clean_up_text',
    'standardize_annotation',
]


[docs]def annotation_to_label(text, class_labels, include_labels_with_artefacts=True): """ Convert a standard annotation text to a sleep stage label. To control the type of classes for classification of text, see the class_labels argument. LABEL_ANNOTATIONS_MAPPING maps standard annotation texts to a standard label. This function finds the (standard) sleep label in `class_labels` which matches the standard annotation in `text`. If the text matches with multiple labels, the first label it matches with is returned. Texts that can not be labeled as any of the specified labels are labeled as no_label. Args: text (str): standard annotation text to convert to a label, see STANDARD_ANNOTATIONS. class_labels (list): a list with labels for the classification of text, see SLEEP_LABELS and LABEL_ANNOTATIONS_MAPPING. include_labels_with_artefacts (bool): if True, texts with a sleep label and indicating an artefact are included. If False, any text indicating an artefact is excluded. Defaults to True. Returns: label (str): the label corresponding to text (one of the specified labels or NL (no_label)). """ if not isinstance(class_labels, list): raise TypeError('Argument `class_labels` must be a list. Got type {}.'.format(class_labels)) artefact_label = SLEEP_LABELS['artefact'] if include_labels_with_artefacts: if artefact_label in class_labels: # Put artefact label at the end of the list, so its considered last. class_labels.remove(artefact_label) class_labels.append(artefact_label) # Default label (when no suitable label found): no_label. label = SLEEP_LABELS['no_label'] # Find the first label in class_labels for which the text classifies. for lab in class_labels: # Check label input. if lab not in LABEL_ANNOTATIONS_MAPPING: raise ValueError('Invalid label "{}". Choose from {}.'.format(lab, list(LABEL_ANNOTATIONS_MAPPING.keys()))) valid_texts = LABEL_ANNOTATIONS_MAPPING[lab].copy() if include_labels_with_artefacts: valid_texts.extend([text + STANDARD_ANNOTATIONS['additional_artefact'] for text in valid_texts]) if text in valid_texts: label = lab break # Artefacts receive special treatment: standard text with additional artefact belong to artefact label (but only # after checking all other labels accepting additional artefacts if include_labels_with_artefacts is True, # therefore artefact label was put at the end of the list, and only now we check for additional artefact). if lab == artefact_label: if STANDARD_ANNOTATIONS['additional_artefact'] in text: label = lab break return label
[docs]def clean_up_text(text): """ Remove redundant text and whitespace from a raw annotation text. Args: text (str): annotation text. Returns: clean_text (str): cleaned up annotation text. """ clean_text = text # Remove 'Note : ' at the beginning of the text. if clean_text.startswith('Note : '): clean_text = clean_text[7:] # Remove trailing and leading * characters. clean_text = clean_text.strip('*') # Remove trailing and leading white spaces. clean_text = clean_text.strip() return clean_text
[docs]def standardize_annotation(text): """ Convert an annotation text to a standard annotation (one of STANDARD_ANNOTATIONS). Args: text (str): annotation text to convert. Returns: std_text (str): standard annotation corresponding to the annotation text. Raises: NotImplementedError: if the annotation text cannot be automatically recognized. """ if is_undetermined(text): std_text = STANDARD_ANNOTATIONS['undetermined'] elif is_indeterminate_sleep(text): std_text = STANDARD_ANNOTATIONS['indeterminate_sleep'] elif is_transitional_sleep(text): std_text = STANDARD_ANNOTATIONS['transitional_sleep'] elif is_dubious(text): std_text = STANDARD_ANNOTATIONS['dubious'] elif is_non_quiet_sleep(text): std_text = STANDARD_ANNOTATIONS['non_quiet_sleep'] elif is_quiet_sleep_hvs_ta(text): std_text = STANDARD_ANNOTATIONS['quiet_sleep_hvs_ta'] elif is_quiet_sleep_ta(text): std_text = STANDARD_ANNOTATIONS['quiet_sleep_ta'] elif is_quiet_sleep_hvs(text): std_text = STANDARD_ANNOTATIONS['quiet_sleep_hvs'] elif is_quiet_sleep(text): std_text = STANDARD_ANNOTATIONS['quiet_sleep'] elif is_active_sleep_1(text): std_text = STANDARD_ANNOTATIONS['active_sleep_1'] elif is_active_sleep_2(text): std_text = STANDARD_ANNOTATIONS['active_sleep_2'] elif is_active_sleep(text): std_text = STANDARD_ANNOTATIONS['active_sleep'] elif is_wake(text): std_text = STANDARD_ANNOTATIONS['wake'] elif is_artefact(text): std_text = STANDARD_ANNOTATIONS['artefact'] elif is_blinking(text): std_text = STANDARD_ANNOTATIONS['blinking'] elif is_movement(text): std_text = STANDARD_ANNOTATIONS['movement'] elif is_no_label(text): std_text = STANDARD_ANNOTATIONS['no_label'] elif is_start(text): std_text = 'START' elif is_stop(text): std_text = 'STOP' else: raise NotImplementedError('Cannot automatically standardize annotation with text: "{}".'.format(text)) # If artefact is present additionally, add additional artefact to text. if std_text not in [ STANDARD_ANNOTATIONS['artefact'], STANDARD_ANNOTATIONS['blinking'], STANDARD_ANNOTATIONS['movement'] ]: if is_artefact(text) or is_blinking(text) or is_movement(text): std_text += STANDARD_ANNOTATIONS['additional_artefact'] return std_text
def word_in_text(word, text): """ Check whether `word` is in `text` as a single word (so not just a part of another word). Case sensitive. Args: word (str): word to look for in text. text (str): the text to search. Returns: found (bool): True if word is in text, False if not. """ found = False match = word in text # Check match. while match: # Check whether `word` match is not just a part of another word. idx = text.find(word) valid_previous_char = idx == 0 or not text[idx-1].isalpha() valid_next_char = idx + len(word) == len(text) or not text[idx + len(word)].isalpha() if valid_previous_char and valid_next_char: found = True break text = text[idx + len(word):] match = word in text return found def pattern_or_word_in_text(patterns, words, text): """ Check whether any pattern or word is in `text`. Patterns are not case sensitive and ignore any white spaces and can be part of a (longer) word. Words are case sensitive and they cannot be part of a (longer) word. Args: patterns (list): list of patterns to look for. words (list): list of words to look for in text. text (str): the text to search. Returns: (bool): True if any pattern or word is in text, False if not. Raises: TypeError: if patterns or words is not a list. """ if not isinstance(patterns, list): raise TypeError('Arument `patterns` must be a list. Got type(patterns)={}.'.format(type(patterns))) if not isinstance(words, list): raise TypeError('Arument `words` must be a list. Got type(words)={}.'.format(type(words))) # Convert to lower case, remove white spaces and find patterns. matches = [p.lower().replace(' ', '') in text.lower().replace(' ', '') for p in patterns] # Find words. matches.extend([word_in_text(w, text) for w in words]) return any(matches) # To standardize the annotations, we have to look for patterns and words in the annotation text. # Patterns are not case sensitive and ignore any white spaces and can be part of a (longer) word. # Words are case sensitive and they cannot be part of a (longer) word. def is_non_quiet_sleep(text): patterns = ['nqs'] words = [SLEEP_LABELS['non_quiet_sleep']] return pattern_or_word_in_text(patterns, words, text) def is_quiet_sleep(text): patterns = ['qs'] words = [SLEEP_LABELS['quiet_sleep']] return pattern_or_word_in_text(patterns, words, text) def is_quiet_sleep_ta(text): patterns = ['qs ta', 'qs hvs/ta', 'qs hvs+ta'] words = [SLEEP_LABELS['quiet_sleep_ta'], 'TA'] return pattern_or_word_in_text(patterns, words, text) def is_quiet_sleep_hvs(text): patterns = ['qs hvs'] words = [SLEEP_LABELS['quiet_sleep_hvs'], 'HVS'] return pattern_or_word_in_text(patterns, words, text) def is_quiet_sleep_hvs_ta(text): patterns = ['ta/hvs', 'ta+hvs', 'hvs/ta', 'hvs+ta'] words = [] return pattern_or_word_in_text(patterns, words, text) def is_active_sleep(text): patterns = [] words = ['AS', SLEEP_LABELS['active_sleep']] return pattern_or_word_in_text(patterns, words, text) def is_active_sleep_1(text): patterns = ['as 1'] words = [SLEEP_LABELS['active_sleep_1'], 'ASI'] return pattern_or_word_in_text(patterns, words, text) def is_active_sleep_2(text): patterns = ['as 2'] words = [SLEEP_LABELS['active_sleep_2'], 'ASII', 'LVI'] return pattern_or_word_in_text(patterns, words, text) def is_wake(text): patterns = ['wake'] words = [SLEEP_LABELS['wake']] return pattern_or_word_in_text(patterns, words, text) def is_artefact(text): patterns = ['artefact', 'artifact'] words = [SLEEP_LABELS['artefact']] return pattern_or_word_in_text(patterns, words, text) def is_blinking(text): patterns = ['blinking'] words = [] return pattern_or_word_in_text(patterns, words, text) def is_movement(text): patterns = ['movement'] words = [] return pattern_or_word_in_text(patterns, words, text) def is_no_label(text): patterns = [] words = ['None', 'none', 'Unlabeled'] return pattern_or_word_in_text(patterns, words, text) def is_dubious(text): patterns = ['?', 'dubious'] words = [] return pattern_or_word_in_text(patterns, words, text) def is_undetermined(text): patterns = ['undetermined', 'undertermined', 'uncertain'] words = [] return pattern_or_word_in_text(patterns, words, text) def is_indeterminate_sleep(text): patterns = ['indeterminate'] words = [SLEEP_LABELS['indeterminate_sleep']] return pattern_or_word_in_text(patterns, words, text) def is_transitional_sleep(text): patterns = ['intermediate', 'transitional'] words = [SLEEP_LABELS['transitional_sleep']] return pattern_or_word_in_text(patterns, words, text) def is_start(text): patterns = ['start sleep scoring'] words = [] return pattern_or_word_in_text(patterns, words, text) def is_stop(text): patterns = ['stop scoring'] words = [] return pattern_or_word_in_text(patterns, words, text)