Source code for nnsa.annotations.sleep_stages

"""
Code related to handling annotations containing sleep stages.
"""
from nnsa.annotations.config import STANDARD_ANNOTATIONS, LABEL_ANNOTATIONS_MAPPING, SLEEP_LABELS

__all__ = [
    'annotation_to_label',
    'clean_up_text',
    'standardize_annotation',
]


[docs]def annotation_to_label(text, class_labels, include_labels_with_artefacts=True):
    """
    Convert a standard annotation text to a sleep stage label.

    To control the type of classes for classification of text, see the class_labels argument.

    LABEL_ANNOTATIONS_MAPPING maps standard annotation texts to a standard label. This function finds the (standard)
    sleep label in `class_labels` which matches the standard annotation in `text`.
    If the text matches with multiple labels, the first label it matches with is returned.
    Texts that can not be labeled as any of the specified labels are labeled as no_label.

    Args:
        text (str): standard annotation text to convert to a label, see STANDARD_ANNOTATIONS.
        class_labels (list): a list with labels for the classification of text, see SLEEP_LABELS and
            LABEL_ANNOTATIONS_MAPPING.
        include_labels_with_artefacts (bool): if True, texts with a sleep label and indicating an artefact are included.
            If False, any text indicating an artefact is excluded.
            Defaults to True.

    Returns:
        label (str): the label corresponding to text (one of the specified labels or NL (no_label)).
    """
    if not isinstance(class_labels, list):
        raise TypeError('Argument `class_labels` must be a list. Got type {}.'.format(class_labels))

    artefact_label = SLEEP_LABELS['artefact']
    if include_labels_with_artefacts:
        if artefact_label in class_labels:
            # Put artefact label at the end of the list, so its considered last.
            class_labels.remove(artefact_label)
            class_labels.append(artefact_label)

    # Default label (when no suitable label found): no_label.
    label = SLEEP_LABELS['no_label']

    # Find the first label in class_labels for which the text classifies.
    for lab in class_labels:
        # Check label input.
        if lab not in LABEL_ANNOTATIONS_MAPPING:
            raise ValueError('Invalid label "{}". Choose from {}.'.format(lab, list(LABEL_ANNOTATIONS_MAPPING.keys())))

        valid_texts = LABEL_ANNOTATIONS_MAPPING[lab].copy()
        if include_labels_with_artefacts:
            valid_texts.extend([text + STANDARD_ANNOTATIONS['additional_artefact']
                                for text in valid_texts])
        if text in valid_texts:
            label = lab
            break

        # Artefacts receive special treatment: standard text with additional artefact belong to artefact label (but only
        # after checking all other labels accepting additional artefacts if include_labels_with_artefacts is True,
        # therefore artefact label was put at the end of the list, and only now we check for additional artefact).
        if lab == artefact_label:
            if STANDARD_ANNOTATIONS['additional_artefact'] in text:
                label = lab
                break

    return label


[docs]def clean_up_text(text):
    """
    Remove redundant text and whitespace from a raw annotation text.

    Args:
        text (str): annotation text.

    Returns:
        clean_text (str): cleaned up annotation text.
    """
    clean_text = text

    # Remove 'Note : ' at the beginning of the text.
    if clean_text.startswith('Note : '):
        clean_text = clean_text[7:]

    # Remove trailing and leading * characters.
    clean_text = clean_text.strip('*')

    # Remove trailing and leading white spaces.
    clean_text = clean_text.strip()

    return clean_text


[docs]def standardize_annotation(text):
    """
    Convert an annotation text to a standard annotation (one of STANDARD_ANNOTATIONS).

    Args:
        text (str): annotation text to convert.

    Returns:
        std_text (str): standard annotation corresponding to the annotation text.

    Raises:
        NotImplementedError: if the annotation text cannot be automatically recognized.
    """
    if is_undetermined(text):
        std_text = STANDARD_ANNOTATIONS['undetermined']

    elif is_indeterminate_sleep(text):
        std_text = STANDARD_ANNOTATIONS['indeterminate_sleep']

    elif is_transitional_sleep(text):
        std_text = STANDARD_ANNOTATIONS['transitional_sleep']

    elif is_dubious(text):
        std_text = STANDARD_ANNOTATIONS['dubious']

    elif is_non_quiet_sleep(text):
        std_text = STANDARD_ANNOTATIONS['non_quiet_sleep']

    elif is_quiet_sleep_hvs_ta(text):
        std_text = STANDARD_ANNOTATIONS['quiet_sleep_hvs_ta']

    elif is_quiet_sleep_ta(text):
        std_text = STANDARD_ANNOTATIONS['quiet_sleep_ta']

    elif is_quiet_sleep_hvs(text):
        std_text = STANDARD_ANNOTATIONS['quiet_sleep_hvs']

    elif is_quiet_sleep(text):
        std_text = STANDARD_ANNOTATIONS['quiet_sleep']

    elif is_active_sleep_1(text):
        std_text = STANDARD_ANNOTATIONS['active_sleep_1']

    elif is_active_sleep_2(text):
        std_text = STANDARD_ANNOTATIONS['active_sleep_2']

    elif is_active_sleep(text):
        std_text = STANDARD_ANNOTATIONS['active_sleep']

    elif is_wake(text):
        std_text = STANDARD_ANNOTATIONS['wake']

    elif is_artefact(text):
        std_text = STANDARD_ANNOTATIONS['artefact']

    elif is_blinking(text):
        std_text = STANDARD_ANNOTATIONS['blinking']

    elif is_movement(text):
        std_text = STANDARD_ANNOTATIONS['movement']

    elif is_no_label(text):
        std_text = STANDARD_ANNOTATIONS['no_label']

    elif is_start(text):
        std_text = 'START'

    elif is_stop(text):
        std_text = 'STOP'

    else:
        raise NotImplementedError('Cannot automatically standardize annotation with text: "{}".'.format(text))

    # If artefact is present additionally, add additional artefact to text.
    if std_text not in [
        STANDARD_ANNOTATIONS['artefact'],
        STANDARD_ANNOTATIONS['blinking'],
        STANDARD_ANNOTATIONS['movement']
    ]:
        if is_artefact(text) or is_blinking(text) or is_movement(text):
            std_text += STANDARD_ANNOTATIONS['additional_artefact']

    return std_text


def word_in_text(word, text):
    """
    Check whether `word` is in `text` as a single word (so not just a part of another word).

    Case sensitive.

    Args:
        word (str): word to look for in text.
        text (str): the text to search.

    Returns:
        found (bool): True if word is in text, False if not.
    """
    found = False
    match = word in text
    # Check match.
    while match:
        # Check whether `word` match is not just a part of another word.
        idx = text.find(word)
        valid_previous_char = idx == 0 or not text[idx-1].isalpha()
        valid_next_char = idx + len(word) == len(text) or not text[idx + len(word)].isalpha()

        if valid_previous_char and valid_next_char:
            found = True
            break
        text = text[idx + len(word):]
        match = word in text
    return found


def pattern_or_word_in_text(patterns, words, text):
    """
    Check whether any pattern or word is in `text`.

    Patterns are not case sensitive and ignore any white spaces and can be part of a (longer) word.
    Words are case sensitive and they cannot be part of a (longer) word.

    Args:
        patterns (list): list of patterns to look for.
        words (list): list of words to look for in text.
        text (str): the text to search.

    Returns:
        (bool): True if any pattern or word is in text, False if not.

    Raises:
        TypeError: if patterns or words is not a list.
    """
    if not isinstance(patterns, list):
        raise TypeError('Arument `patterns` must be a list. Got type(patterns)={}.'.format(type(patterns)))
    if not isinstance(words, list):
        raise TypeError('Arument `words` must be a list. Got type(words)={}.'.format(type(words)))

    # Convert to lower case, remove white spaces and find patterns.
    matches = [p.lower().replace(' ', '') in text.lower().replace(' ', '') for p in patterns]

    # Find words.
    matches.extend([word_in_text(w, text) for w in words])
    return any(matches)


# To standardize the annotations, we have to look for patterns and words in the annotation text.
# Patterns are not case sensitive and ignore any white spaces and can be part of a (longer) word.
# Words are case sensitive and they cannot be part of a (longer) word.
def is_non_quiet_sleep(text):
    patterns = ['nqs']
    words = [SLEEP_LABELS['non_quiet_sleep']]
    return pattern_or_word_in_text(patterns, words, text)


def is_quiet_sleep(text):
    patterns = ['qs']
    words = [SLEEP_LABELS['quiet_sleep']]
    return pattern_or_word_in_text(patterns, words, text)


def is_quiet_sleep_ta(text):
    patterns = ['qs ta',
                'qs hvs/ta',
                'qs hvs+ta']
    words = [SLEEP_LABELS['quiet_sleep_ta'],
             'TA']
    return pattern_or_word_in_text(patterns, words, text)


def is_quiet_sleep_hvs(text):
    patterns = ['qs hvs']
    words = [SLEEP_LABELS['quiet_sleep_hvs'],
             'HVS']
    return pattern_or_word_in_text(patterns, words, text)


def is_quiet_sleep_hvs_ta(text):
    patterns = ['ta/hvs',
                'ta+hvs',
                'hvs/ta',
                'hvs+ta']
    words = []
    return pattern_or_word_in_text(patterns, words, text)


def is_active_sleep(text):
    patterns = []
    words = ['AS',
             SLEEP_LABELS['active_sleep']]
    return pattern_or_word_in_text(patterns, words, text)


def is_active_sleep_1(text):
    patterns = ['as 1']
    words = [SLEEP_LABELS['active_sleep_1'],
             'ASI']
    return pattern_or_word_in_text(patterns, words, text)


def is_active_sleep_2(text):
    patterns = ['as 2']
    words = [SLEEP_LABELS['active_sleep_2'],
             'ASII',
             'LVI']
    return pattern_or_word_in_text(patterns, words, text)


def is_wake(text):
    patterns = ['wake']
    words = [SLEEP_LABELS['wake']]
    return pattern_or_word_in_text(patterns, words, text)


def is_artefact(text):
    patterns = ['artefact',
                'artifact']
    words = [SLEEP_LABELS['artefact']]
    return pattern_or_word_in_text(patterns, words, text)


def is_blinking(text):
    patterns = ['blinking']
    words = []
    return pattern_or_word_in_text(patterns, words, text)


def is_movement(text):
    patterns = ['movement']
    words = []
    return pattern_or_word_in_text(patterns, words, text)


def is_no_label(text):
    patterns = []
    words = ['None',
             'none',
             'Unlabeled']
    return pattern_or_word_in_text(patterns, words, text)


def is_dubious(text):
    patterns = ['?',
                'dubious']
    words = []
    return pattern_or_word_in_text(patterns, words, text)


def is_undetermined(text):
    patterns = ['undetermined',
                'undertermined',
                'uncertain']
    words = []
    return pattern_or_word_in_text(patterns, words, text)


def is_indeterminate_sleep(text):
    patterns = ['indeterminate']
    words = [SLEEP_LABELS['indeterminate_sleep']]
    return pattern_or_word_in_text(patterns, words, text)


def is_transitional_sleep(text):
    patterns = ['intermediate', 'transitional']
    words = [SLEEP_LABELS['transitional_sleep']]
    return pattern_or_word_in_text(patterns, words, text)


def is_start(text):
    patterns = ['start sleep scoring']
    words = []
    return pattern_or_word_in_text(patterns, words, text)


def is_stop(text):
    patterns = ['stop scoring']
    words = []
    return pattern_or_word_in_text(patterns, words, text)