Source code for nnsa.utils.normalization

"""
Contains functions for normalizing data.
"""

import numpy as np

__all__ = [
    'align',
    'normalize_eeg_channels'
]

# Small number used to add to denominator to avoid division by zero.
EPS = 1e-7


[docs]def align(a, b, scale=True, flip_sign=True):
    """
    Normalize the signals a and b by division by max and flipping the sign if needed.

    Args:
        a (np.ndarray): 1D array.
        b (np.ndarray): 1D array.
        scale (bool, optional): if True, scales the signals by divisind by their maximum.
        flip_sign (bool, optional): if True, flips the sign of b if it results in a better match between a and b.

    Returns:
        a_out (np.ndarray): transformed a.
        b_out (np.ndarray): transformed b.
    """
    # Normalize for easier visual comparison.
    a_out = np.asarray(a)
    b_out = np.asarray(b)

    if scale:
        # Divide by maximum.
        a_out /= np.max(abs(a_out))
        b_out /= np.max(abs(b_out))

    if flip_sign:
        # Flip one signal if it then compares better to the other.
        if np.dot(a_out, -b_out) > np.dot(a_out, b_out):
            b_out *= -1

    return a_out, b_out

[docs]def normalize_eeg_channels(x, mean=None, std=None):
    """
    Normzalize the EEG data per channel.

    Args:
        x (np.ndarray): EEG data with shape (batch_size, num_samples, num_channels), where num_samples are the number of
            samples in one EEG signal, i.e. it corresponds to the time dimension of the EEG data.
        mean (np.ndarray, optional): array of shape (num_channels) with a value for each channel used to normalize the
            data. If None, the mean of the EEG data in x will be computed per channel.
        std (np.ndarray, optional): array of shape (num_channels) with a value for each channel used to normalize the
            data. If None, the std of the EEG data in x will be computed per channel.

    Returns:
        x_normalized (np.ndarray): array with same shape as x containing normalized values.
        mean (np.ndarray): array of shape (num_channels) with a value for each channel used to normalize the data.
        std (np.ndarray): array of shape (num_channels) with a value for each channel used to normalize the data.
    """
    # Extract shape of x.
    batch_size, num_samples, num_channels = x.shape[:3]

    # If mean and std are not given, compute them.
    if mean is None and std is None:
        # Compute mean and std per channel.
        data_per_channel = x.reshape(-1, num_channels)
        mean = np.nanmean(data_per_channel, axis=0)
        std = np.nanstd(data_per_channel, axis=0)

    # Normalize x.
    x_normalized = np.zeros(x.shape)
    for ch in range(0, num_channels):
        x_normalized[:, :, ch] = (x[:, :, ch] - mean[ch]) / (std[ch] + EPS)

    return x_normalized, mean, std