Source code for nnsa.keras.losses

"""
This module contains Keras implementations of custom loss functions.
"""
import numpy as np
import tensorflow as tf
from tensorflow.keras import backend as K


[docs]def kl_loss(z_mean, z_log_var, beta=1): """ Tensorflow implementation of Kullback–Leibler divergence loss term for training variational autoencoders. References: D. P. Kingma and M. Welling, “Auto-Encoding Variational Bayes.” 2014. https://arxiv.org/pdf/1312.6114.pdf Implementation: https://keras.io/examples/generative/vae/#define-the-vae-as-a-model-with-a-custom-trainstep Args: z_mean: layer with z_mean. z_log_var: layer with logarithm of the variance. beta: weight parameter of the KL-loss. Can be a float or a tensorflow variable. Examples: autoencoder.add_loss(kl_loss(z_mean, z_log_var, beta=1)) """ # KL Loss for Gaussian (eq. 10). kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var)) # Sum over latent variables. kl_loss = tf.reduce_sum(kl_loss, axis=-1) # Apply weight. kl_loss *= beta # Mean of samples in batch. kl_loss = tf.reduce_mean(kl_loss) return kl_loss
[docs]def mean_var_loss(y_true, y_pred): """ Mean-variance loss (Eq. 5, 8). References: A. Kendall and Y. Gal, “What Uncertainties Do We Need in Bayesian Deep Learning for Computer Vision?,” 2017-03-15. Args: y_true: the true values for y. y_pred: the first axis of the last dimension should contain the mean/point estimate. The second axis of the last dimension corresponds to the log variance (see Eq. 8). """ mean = y_pred[:, 0:1] log_var = y_pred[:, 1:2] squared_difference = 0.5*tf.exp(-log_var) * tf.square(y_true - mean) var_penalty = 0.5*log_var total_loss = tf.reduce_mean(squared_difference + var_penalty, axis=-1) return total_loss
[docs]def myCategoricalCrossentropy(weights=1.0, axis=-1, pseudolabel_weight=None): """ Cross-categorical loss that works for batches with unlabeled examples (where the one-hot encoding is all zeros), by rescaling the computation of the mean (ignoring unlabeled examples), such that the loss won't become small due to a large number of unlabeled data. Adopted from rom https://gist.github.com/wassname/ce364fddfc8a025bfab4348cf5de852d. Args: weights (tuple): weight for each class, e.g. for 2 class problem (1.0, 10.0). axis (int): axis corresponding to the number of classes. Examples: >>> model.compile(loss=myCategoricalCrossentropy()) """ def loss_fn(y_true, y_pred): # clip to prevent NaN's and Inf's y_pred = K.clip(y_pred, K.epsilon(), 1 - K.epsilon()) # scale predictions so that the class probas of each sample sum to 1 y_pred /= K.sum(y_pred, axis=axis, keepdims=True) # clip to prevent NaN's and Inf's y_pred = K.clip(y_pred, K.epsilon(), 1 - K.epsilon()) # calc loss = y_true * K.log(y_pred) # Note that loss for y_true==0 is 0. # Apply weights to classes. loss = loss * weights if pseudolabel_weight is not None: # Variable that indicates whether the sample is labeled or not. labeled = tf.reduce_sum(y_true, axis=-1) # 1 if labeled, 0 if unlabeled. unlabeled = (1 - labeled) # 1 if unlabeled, 0 if labeled. # Pseudoloss. y_true_pseudo = tf.round(y_pred) # FIXME: Only works properly for 2 class (but might do well on multiple classes, by only using high confidence (> 0.5) predictions). loss_pseudo = y_true_pseudo * K.log(y_pred) loss_pseudo = loss_pseudo * weights # Add labeled and psuedolosses. loss_labeled = labeled * tf.reduce_mean(loss, axis=-1) loss_unlabeled = unlabeled * tf.reduce_mean(loss_pseudo, axis=-1) # Set labeled pseudoloss to zero. loss = loss_labeled + pseudolabel_weight*loss_unlabeled loss = -K.mean(loss) return loss return loss_fn
[docs]def SADLoss(c, eta=1.0): """ Semi-supervised anomaly detection loss (Ruff et al. 2019). References: L. Ruff et al., “Deep Semi-Supervised Anomaly Detection.” arXiv, 2019, doi: 10.48550/ARXIV.1906.02694. Args: c (list or np.ndarray): hypersphere center (n_laten_dims,). eta (float): controlls the weight of the labeled loss. Set >1 to have more weight on labeled loss, set <1 to have less weight on labeled loss (compared to unlabeled). Examples: >>> model.compile(loss=SADLoss(c=[4.0, 3.0])) """ def loss_fn(y_true, y_pred): """ Anomaly loss for mixed labeled and unlabeled data. Args: y_true: ND-tensor with true labels (n_samples, ...), where -1 refers to anomaly, 0 to unlabeled and 1 to clean. y_pred: N+1D tensor with predicted encodings (n_samples, ..., n_latent_dims). Returns: loss: 1D tensor (n_samples,) with loss as described in the paper by Ruff et al. 2019. """ # Variable that indicates whether the sample is labeled or not. labeled = tf.pow(y_true, 2) # 1 if labeled, 0 if unlabeled. unlabeled = (1 - labeled) # 1 if unlabeled, 0 if labeled. # Squared distance to hyperplane center. Add epsilon to prevent Inf's when raising to power -1. s = tf.reduce_sum(tf.square(y_pred - c), axis=-1) + K.epsilon() # Unlabeled + labeled loss. loss = s * unlabeled + eta * tf.pow(s, y_true) * labeled # Average over remaining dimensions (until we are left with a 1D array). while len(loss.shape) > 1: loss = tf.reduce_mean(loss, axis=-1) return loss return loss_fn
[docs]def quantile_loss(qs): """ Return a quantile (pinball) loss function. References: Koenker, R., Hallock, K.F.: Quantile regression. Journal of Economic Perspectives 15(4), 143-156 (2001). DOI 10.1257/jep.15.4.143 https://www.kaggle.com/ulrich07/quantile-regression-with-keras/notebook https://www.evergreeninnovations.co/blog-quantile-loss-function-for-machine-learning/ Args: qs (list): target quantiles (values between 0 and 1). Returns: loss (function): function that takes in y_true and y_pred, computing the pinball loss. """ # To tensorflow array of constants. qs = tf.constant(np.array([qs]), dtype=tf.float32) # Define loss function for keras. def loss(y_true, y_pred): """ Args: y_true: the true values for y. Shape (N, ) or (N, 1). y_pred: model predictions with shape (N, len(qs)), where the the axes in the last dimension correspond to the target quantiles. I.e., y_pred[:, i] --> qs[i]. """ e = y_true - y_pred v = tf.maximum(qs * e, (qs - 1) * e) return tf.reduce_mean(v, axis=-1) return loss