"""
Code related to handling annotations containing sleep stages.
"""
from nnsa.annotations.config import STANDARD_ANNOTATIONS, LABEL_ANNOTATIONS_MAPPING, SLEEP_LABELS
__all__ = [
'annotation_to_label',
'clean_up_text',
'standardize_annotation',
]
[docs]def annotation_to_label(text, class_labels, include_labels_with_artefacts=True):
"""
Convert a standard annotation text to a sleep stage label.
To control the type of classes for classification of text, see the class_labels argument.
LABEL_ANNOTATIONS_MAPPING maps standard annotation texts to a standard label. This function finds the (standard)
sleep label in `class_labels` which matches the standard annotation in `text`.
If the text matches with multiple labels, the first label it matches with is returned.
Texts that can not be labeled as any of the specified labels are labeled as no_label.
Args:
text (str): standard annotation text to convert to a label, see STANDARD_ANNOTATIONS.
class_labels (list): a list with labels for the classification of text, see SLEEP_LABELS and
LABEL_ANNOTATIONS_MAPPING.
include_labels_with_artefacts (bool): if True, texts with a sleep label and indicating an artefact are included.
If False, any text indicating an artefact is excluded.
Defaults to True.
Returns:
label (str): the label corresponding to text (one of the specified labels or NL (no_label)).
"""
if not isinstance(class_labels, list):
raise TypeError('Argument `class_labels` must be a list. Got type {}.'.format(class_labels))
artefact_label = SLEEP_LABELS['artefact']
if include_labels_with_artefacts:
if artefact_label in class_labels:
# Put artefact label at the end of the list, so its considered last.
class_labels.remove(artefact_label)
class_labels.append(artefact_label)
# Default label (when no suitable label found): no_label.
label = SLEEP_LABELS['no_label']
# Find the first label in class_labels for which the text classifies.
for lab in class_labels:
# Check label input.
if lab not in LABEL_ANNOTATIONS_MAPPING:
raise ValueError('Invalid label "{}". Choose from {}.'.format(lab, list(LABEL_ANNOTATIONS_MAPPING.keys())))
valid_texts = LABEL_ANNOTATIONS_MAPPING[lab].copy()
if include_labels_with_artefacts:
valid_texts.extend([text + STANDARD_ANNOTATIONS['additional_artefact']
for text in valid_texts])
if text in valid_texts:
label = lab
break
# Artefacts receive special treatment: standard text with additional artefact belong to artefact label (but only
# after checking all other labels accepting additional artefacts if include_labels_with_artefacts is True,
# therefore artefact label was put at the end of the list, and only now we check for additional artefact).
if lab == artefact_label:
if STANDARD_ANNOTATIONS['additional_artefact'] in text:
label = lab
break
return label
[docs]def clean_up_text(text):
"""
Remove redundant text and whitespace from a raw annotation text.
Args:
text (str): annotation text.
Returns:
clean_text (str): cleaned up annotation text.
"""
clean_text = text
# Remove 'Note : ' at the beginning of the text.
if clean_text.startswith('Note : '):
clean_text = clean_text[7:]
# Remove trailing and leading * characters.
clean_text = clean_text.strip('*')
# Remove trailing and leading white spaces.
clean_text = clean_text.strip()
return clean_text
[docs]def standardize_annotation(text):
"""
Convert an annotation text to a standard annotation (one of STANDARD_ANNOTATIONS).
Args:
text (str): annotation text to convert.
Returns:
std_text (str): standard annotation corresponding to the annotation text.
Raises:
NotImplementedError: if the annotation text cannot be automatically recognized.
"""
if is_undetermined(text):
std_text = STANDARD_ANNOTATIONS['undetermined']
elif is_indeterminate_sleep(text):
std_text = STANDARD_ANNOTATIONS['indeterminate_sleep']
elif is_transitional_sleep(text):
std_text = STANDARD_ANNOTATIONS['transitional_sleep']
elif is_dubious(text):
std_text = STANDARD_ANNOTATIONS['dubious']
elif is_non_quiet_sleep(text):
std_text = STANDARD_ANNOTATIONS['non_quiet_sleep']
elif is_quiet_sleep_hvs_ta(text):
std_text = STANDARD_ANNOTATIONS['quiet_sleep_hvs_ta']
elif is_quiet_sleep_ta(text):
std_text = STANDARD_ANNOTATIONS['quiet_sleep_ta']
elif is_quiet_sleep_hvs(text):
std_text = STANDARD_ANNOTATIONS['quiet_sleep_hvs']
elif is_quiet_sleep(text):
std_text = STANDARD_ANNOTATIONS['quiet_sleep']
elif is_active_sleep_1(text):
std_text = STANDARD_ANNOTATIONS['active_sleep_1']
elif is_active_sleep_2(text):
std_text = STANDARD_ANNOTATIONS['active_sleep_2']
elif is_active_sleep(text):
std_text = STANDARD_ANNOTATIONS['active_sleep']
elif is_wake(text):
std_text = STANDARD_ANNOTATIONS['wake']
elif is_artefact(text):
std_text = STANDARD_ANNOTATIONS['artefact']
elif is_blinking(text):
std_text = STANDARD_ANNOTATIONS['blinking']
elif is_movement(text):
std_text = STANDARD_ANNOTATIONS['movement']
elif is_no_label(text):
std_text = STANDARD_ANNOTATIONS['no_label']
elif is_start(text):
std_text = 'START'
elif is_stop(text):
std_text = 'STOP'
else:
raise NotImplementedError('Cannot automatically standardize annotation with text: "{}".'.format(text))
# If artefact is present additionally, add additional artefact to text.
if std_text not in [
STANDARD_ANNOTATIONS['artefact'],
STANDARD_ANNOTATIONS['blinking'],
STANDARD_ANNOTATIONS['movement']
]:
if is_artefact(text) or is_blinking(text) or is_movement(text):
std_text += STANDARD_ANNOTATIONS['additional_artefact']
return std_text
def word_in_text(word, text):
"""
Check whether `word` is in `text` as a single word (so not just a part of another word).
Case sensitive.
Args:
word (str): word to look for in text.
text (str): the text to search.
Returns:
found (bool): True if word is in text, False if not.
"""
found = False
match = word in text
# Check match.
while match:
# Check whether `word` match is not just a part of another word.
idx = text.find(word)
valid_previous_char = idx == 0 or not text[idx-1].isalpha()
valid_next_char = idx + len(word) == len(text) or not text[idx + len(word)].isalpha()
if valid_previous_char and valid_next_char:
found = True
break
text = text[idx + len(word):]
match = word in text
return found
def pattern_or_word_in_text(patterns, words, text):
"""
Check whether any pattern or word is in `text`.
Patterns are not case sensitive and ignore any white spaces and can be part of a (longer) word.
Words are case sensitive and they cannot be part of a (longer) word.
Args:
patterns (list): list of patterns to look for.
words (list): list of words to look for in text.
text (str): the text to search.
Returns:
(bool): True if any pattern or word is in text, False if not.
Raises:
TypeError: if patterns or words is not a list.
"""
if not isinstance(patterns, list):
raise TypeError('Arument `patterns` must be a list. Got type(patterns)={}.'.format(type(patterns)))
if not isinstance(words, list):
raise TypeError('Arument `words` must be a list. Got type(words)={}.'.format(type(words)))
# Convert to lower case, remove white spaces and find patterns.
matches = [p.lower().replace(' ', '') in text.lower().replace(' ', '') for p in patterns]
# Find words.
matches.extend([word_in_text(w, text) for w in words])
return any(matches)
# To standardize the annotations, we have to look for patterns and words in the annotation text.
# Patterns are not case sensitive and ignore any white spaces and can be part of a (longer) word.
# Words are case sensitive and they cannot be part of a (longer) word.
def is_non_quiet_sleep(text):
patterns = ['nqs']
words = [SLEEP_LABELS['non_quiet_sleep']]
return pattern_or_word_in_text(patterns, words, text)
def is_quiet_sleep(text):
patterns = ['qs']
words = [SLEEP_LABELS['quiet_sleep']]
return pattern_or_word_in_text(patterns, words, text)
def is_quiet_sleep_ta(text):
patterns = ['qs ta',
'qs hvs/ta',
'qs hvs+ta']
words = [SLEEP_LABELS['quiet_sleep_ta'],
'TA']
return pattern_or_word_in_text(patterns, words, text)
def is_quiet_sleep_hvs(text):
patterns = ['qs hvs']
words = [SLEEP_LABELS['quiet_sleep_hvs'],
'HVS']
return pattern_or_word_in_text(patterns, words, text)
def is_quiet_sleep_hvs_ta(text):
patterns = ['ta/hvs',
'ta+hvs',
'hvs/ta',
'hvs+ta']
words = []
return pattern_or_word_in_text(patterns, words, text)
def is_active_sleep(text):
patterns = []
words = ['AS',
SLEEP_LABELS['active_sleep']]
return pattern_or_word_in_text(patterns, words, text)
def is_active_sleep_1(text):
patterns = ['as 1']
words = [SLEEP_LABELS['active_sleep_1'],
'ASI']
return pattern_or_word_in_text(patterns, words, text)
def is_active_sleep_2(text):
patterns = ['as 2']
words = [SLEEP_LABELS['active_sleep_2'],
'ASII',
'LVI']
return pattern_or_word_in_text(patterns, words, text)
def is_wake(text):
patterns = ['wake']
words = [SLEEP_LABELS['wake']]
return pattern_or_word_in_text(patterns, words, text)
def is_artefact(text):
patterns = ['artefact',
'artifact']
words = [SLEEP_LABELS['artefact']]
return pattern_or_word_in_text(patterns, words, text)
def is_blinking(text):
patterns = ['blinking']
words = []
return pattern_or_word_in_text(patterns, words, text)
def is_movement(text):
patterns = ['movement']
words = []
return pattern_or_word_in_text(patterns, words, text)
def is_no_label(text):
patterns = []
words = ['None',
'none',
'Unlabeled']
return pattern_or_word_in_text(patterns, words, text)
def is_dubious(text):
patterns = ['?',
'dubious']
words = []
return pattern_or_word_in_text(patterns, words, text)
def is_undetermined(text):
patterns = ['undetermined',
'undertermined',
'uncertain']
words = []
return pattern_or_word_in_text(patterns, words, text)
def is_indeterminate_sleep(text):
patterns = ['indeterminate']
words = [SLEEP_LABELS['indeterminate_sleep']]
return pattern_or_word_in_text(patterns, words, text)
def is_transitional_sleep(text):
patterns = ['intermediate', 'transitional']
words = [SLEEP_LABELS['transitional_sleep']]
return pattern_or_word_in_text(patterns, words, text)
def is_start(text):
patterns = ['start sleep scoring']
words = []
return pattern_or_word_in_text(patterns, words, text)
def is_stop(text):
patterns = ['stop scoring']
words = []
return pattern_or_word_in_text(patterns, words, text)