Source code for nnsa.keras.losses

"""
This module contains Keras implementations of custom loss functions.
"""
import numpy as np
import tensorflow as tf
from tensorflow.keras import backend as K


[docs]def kl_loss(z_mean, z_log_var, beta=1):
    """
    Tensorflow implementation of Kullback–Leibler divergence loss term for
    training variational autoencoders.

    References:
        D. P. Kingma and M. Welling, “Auto-Encoding Variational Bayes.” 2014. https://arxiv.org/pdf/1312.6114.pdf

        Implementation:
        https://keras.io/examples/generative/vae/#define-the-vae-as-a-model-with-a-custom-trainstep

    Args:
        z_mean: layer with z_mean.
        z_log_var: layer with logarithm of the variance.
        beta: weight parameter of the KL-loss. Can be a float or a tensorflow variable.

    Examples:
        autoencoder.add_loss(kl_loss(z_mean, z_log_var, beta=1))
    """
    # KL Loss for Gaussian (eq. 10).
    kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))

    # Sum over latent variables.
    kl_loss = tf.reduce_sum(kl_loss, axis=-1)

    # Apply weight.
    kl_loss *= beta

    # Mean of samples in batch.
    kl_loss = tf.reduce_mean(kl_loss)

    return kl_loss


[docs]def mean_var_loss(y_true, y_pred):
    """
    Mean-variance loss (Eq. 5, 8).

    References:
        A. Kendall and Y. Gal,
        “What Uncertainties Do We Need in Bayesian Deep Learning for Computer Vision?,”
         2017-03-15.

    Args:
        y_true: the true values for y.
        y_pred: the first axis of the last dimension should contain the mean/point estimate.
            The second axis of the last dimension corresponds to the log variance (see Eq. 8).
    """
    mean = y_pred[:, 0:1]
    log_var = y_pred[:, 1:2]
    squared_difference = 0.5*tf.exp(-log_var) * tf.square(y_true - mean)
    var_penalty = 0.5*log_var
    total_loss = tf.reduce_mean(squared_difference + var_penalty, axis=-1)
    return total_loss


[docs]def myCategoricalCrossentropy(weights=1.0, axis=-1, pseudolabel_weight=None):
    """
    Cross-categorical loss that works for batches with unlabeled examples (where the one-hot encoding is all zeros),
    by rescaling the computation of the mean (ignoring unlabeled examples), such that the loss won't become small due to
    a large number of unlabeled data.

    Adopted from rom https://gist.github.com/wassname/ce364fddfc8a025bfab4348cf5de852d.

    Args:
         weights (tuple): weight for each class, e.g. for 2 class problem (1.0, 10.0).
         axis (int): axis corresponding to the number of classes.

    Examples:
        >>> model.compile(loss=myCategoricalCrossentropy())
    """

    def loss_fn(y_true, y_pred):
        # clip to prevent NaN's and Inf's
        y_pred = K.clip(y_pred, K.epsilon(), 1 - K.epsilon())
        # scale predictions so that the class probas of each sample sum to 1
        y_pred /= K.sum(y_pred, axis=axis, keepdims=True)
        # clip to prevent NaN's and Inf's
        y_pred = K.clip(y_pred, K.epsilon(), 1 - K.epsilon())
        # calc
        loss = y_true * K.log(y_pred)  # Note that loss for y_true==0 is 0.

        # Apply weights to classes.
        loss = loss * weights

        if pseudolabel_weight is not None:
            # Variable that indicates whether the sample is labeled or not.
            labeled = tf.reduce_sum(y_true, axis=-1)  # 1 if labeled, 0 if unlabeled.
            unlabeled = (1 - labeled)  # 1 if unlabeled, 0 if labeled.

            # Pseudoloss.
            y_true_pseudo = tf.round(y_pred)  # FIXME: Only works properly for 2 class (but might do well on multiple classes, by only using high confidence (> 0.5) predictions).
            loss_pseudo = y_true_pseudo * K.log(y_pred)
            loss_pseudo = loss_pseudo * weights

            # Add labeled and psuedolosses.
            loss_labeled = labeled * tf.reduce_mean(loss, axis=-1)
            loss_unlabeled = unlabeled * tf.reduce_mean(loss_pseudo, axis=-1)  # Set labeled pseudoloss to zero.
            loss = loss_labeled + pseudolabel_weight*loss_unlabeled

        loss = -K.mean(loss)

        return loss

    return loss_fn


[docs]def SADLoss(c, eta=1.0):
    """
    Semi-supervised anomaly detection loss (Ruff et al. 2019).

    References:
        L. Ruff et al.,
        “Deep Semi-Supervised Anomaly Detection.”
        arXiv, 2019, doi: 10.48550/ARXIV.1906.02694.

    Args:
         c (list or np.ndarray): hypersphere center (n_laten_dims,).
         eta (float): controlls the weight of the labeled loss. Set >1 to have more weight on
            labeled loss, set <1 to have less weight on labeled loss (compared to unlabeled).

    Examples:
        >>> model.compile(loss=SADLoss(c=[4.0, 3.0]))
    """
    def loss_fn(y_true, y_pred):
        """
        Anomaly loss for mixed labeled and unlabeled data.

        Args:
            y_true: ND-tensor with true labels (n_samples, ...), where -1 refers to anomaly,
                0 to unlabeled and 1 to clean.
            y_pred: N+1D tensor with predicted encodings (n_samples, ..., n_latent_dims).

        Returns:
            loss: 1D tensor (n_samples,) with loss as described in the paper by Ruff et al. 2019.
        """
        # Variable that indicates whether the sample is labeled or not.
        labeled = tf.pow(y_true, 2)  # 1 if labeled, 0 if unlabeled.
        unlabeled = (1 - labeled)  # 1 if unlabeled, 0 if labeled.

        # Squared distance to hyperplane center. Add epsilon to prevent Inf's when raising to power -1.
        s = tf.reduce_sum(tf.square(y_pred - c), axis=-1) + K.epsilon()

        # Unlabeled + labeled loss.
        loss = s * unlabeled + eta * tf.pow(s, y_true) * labeled

        # Average over remaining dimensions (until we are left with a 1D array).
        while len(loss.shape) > 1:
            loss = tf.reduce_mean(loss, axis=-1)

        return loss

    return loss_fn


[docs]def quantile_loss(qs):
    """
    Return a quantile (pinball) loss function.

    References:
        Koenker, R., Hallock, K.F.: Quantile regression.
        Journal of Economic Perspectives 15(4), 143-156 (2001). DOI 10.1257/jep.15.4.143

        https://www.kaggle.com/ulrich07/quantile-regression-with-keras/notebook
        https://www.evergreeninnovations.co/blog-quantile-loss-function-for-machine-learning/

    Args:
        qs (list): target quantiles (values between 0 and 1).

    Returns:
        loss (function): function that takes in y_true and y_pred, computing the pinball loss.
    """
    # To tensorflow array of constants.
    qs = tf.constant(np.array([qs]), dtype=tf.float32)

    # Define loss function for keras.
    def loss(y_true, y_pred):
        """
        Args:
            y_true: the true values for y. Shape (N, ) or (N, 1).
            y_pred: model predictions with shape (N, len(qs)), where the the axes in the last dimension
                correspond to the target quantiles. I.e., y_pred[:, i] --> qs[i].
        """
        e = y_true - y_pred
        v = tf.maximum(qs * e, (qs - 1) * e)
        return tf.reduce_mean(v, axis=-1)

    return loss