"""
This module contains Keras implementations of custom loss functions.
"""
import numpy as np
import tensorflow as tf
from tensorflow.keras import backend as K
[docs]def kl_loss(z_mean, z_log_var, beta=1):
"""
Tensorflow implementation of Kullback–Leibler divergence loss term for
training variational autoencoders.
References:
D. P. Kingma and M. Welling, “Auto-Encoding Variational Bayes.” 2014. https://arxiv.org/pdf/1312.6114.pdf
Implementation:
https://keras.io/examples/generative/vae/#define-the-vae-as-a-model-with-a-custom-trainstep
Args:
z_mean: layer with z_mean.
z_log_var: layer with logarithm of the variance.
beta: weight parameter of the KL-loss. Can be a float or a tensorflow variable.
Examples:
autoencoder.add_loss(kl_loss(z_mean, z_log_var, beta=1))
"""
# KL Loss for Gaussian (eq. 10).
kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
# Sum over latent variables.
kl_loss = tf.reduce_sum(kl_loss, axis=-1)
# Apply weight.
kl_loss *= beta
# Mean of samples in batch.
kl_loss = tf.reduce_mean(kl_loss)
return kl_loss
[docs]def mean_var_loss(y_true, y_pred):
"""
Mean-variance loss (Eq. 5, 8).
References:
A. Kendall and Y. Gal,
“What Uncertainties Do We Need in Bayesian Deep Learning for Computer Vision?,”
2017-03-15.
Args:
y_true: the true values for y.
y_pred: the first axis of the last dimension should contain the mean/point estimate.
The second axis of the last dimension corresponds to the log variance (see Eq. 8).
"""
mean = y_pred[:, 0:1]
log_var = y_pred[:, 1:2]
squared_difference = 0.5*tf.exp(-log_var) * tf.square(y_true - mean)
var_penalty = 0.5*log_var
total_loss = tf.reduce_mean(squared_difference + var_penalty, axis=-1)
return total_loss
[docs]def myCategoricalCrossentropy(weights=1.0, axis=-1, pseudolabel_weight=None):
"""
Cross-categorical loss that works for batches with unlabeled examples (where the one-hot encoding is all zeros),
by rescaling the computation of the mean (ignoring unlabeled examples), such that the loss won't become small due to
a large number of unlabeled data.
Adopted from rom https://gist.github.com/wassname/ce364fddfc8a025bfab4348cf5de852d.
Args:
weights (tuple): weight for each class, e.g. for 2 class problem (1.0, 10.0).
axis (int): axis corresponding to the number of classes.
Examples:
>>> model.compile(loss=myCategoricalCrossentropy())
"""
def loss_fn(y_true, y_pred):
# clip to prevent NaN's and Inf's
y_pred = K.clip(y_pred, K.epsilon(), 1 - K.epsilon())
# scale predictions so that the class probas of each sample sum to 1
y_pred /= K.sum(y_pred, axis=axis, keepdims=True)
# clip to prevent NaN's and Inf's
y_pred = K.clip(y_pred, K.epsilon(), 1 - K.epsilon())
# calc
loss = y_true * K.log(y_pred) # Note that loss for y_true==0 is 0.
# Apply weights to classes.
loss = loss * weights
if pseudolabel_weight is not None:
# Variable that indicates whether the sample is labeled or not.
labeled = tf.reduce_sum(y_true, axis=-1) # 1 if labeled, 0 if unlabeled.
unlabeled = (1 - labeled) # 1 if unlabeled, 0 if labeled.
# Pseudoloss.
y_true_pseudo = tf.round(y_pred) # FIXME: Only works properly for 2 class (but might do well on multiple classes, by only using high confidence (> 0.5) predictions).
loss_pseudo = y_true_pseudo * K.log(y_pred)
loss_pseudo = loss_pseudo * weights
# Add labeled and psuedolosses.
loss_labeled = labeled * tf.reduce_mean(loss, axis=-1)
loss_unlabeled = unlabeled * tf.reduce_mean(loss_pseudo, axis=-1) # Set labeled pseudoloss to zero.
loss = loss_labeled + pseudolabel_weight*loss_unlabeled
loss = -K.mean(loss)
return loss
return loss_fn
[docs]def SADLoss(c, eta=1.0):
"""
Semi-supervised anomaly detection loss (Ruff et al. 2019).
References:
L. Ruff et al.,
“Deep Semi-Supervised Anomaly Detection.”
arXiv, 2019, doi: 10.48550/ARXIV.1906.02694.
Args:
c (list or np.ndarray): hypersphere center (n_laten_dims,).
eta (float): controlls the weight of the labeled loss. Set >1 to have more weight on
labeled loss, set <1 to have less weight on labeled loss (compared to unlabeled).
Examples:
>>> model.compile(loss=SADLoss(c=[4.0, 3.0]))
"""
def loss_fn(y_true, y_pred):
"""
Anomaly loss for mixed labeled and unlabeled data.
Args:
y_true: ND-tensor with true labels (n_samples, ...), where -1 refers to anomaly,
0 to unlabeled and 1 to clean.
y_pred: N+1D tensor with predicted encodings (n_samples, ..., n_latent_dims).
Returns:
loss: 1D tensor (n_samples,) with loss as described in the paper by Ruff et al. 2019.
"""
# Variable that indicates whether the sample is labeled or not.
labeled = tf.pow(y_true, 2) # 1 if labeled, 0 if unlabeled.
unlabeled = (1 - labeled) # 1 if unlabeled, 0 if labeled.
# Squared distance to hyperplane center. Add epsilon to prevent Inf's when raising to power -1.
s = tf.reduce_sum(tf.square(y_pred - c), axis=-1) + K.epsilon()
# Unlabeled + labeled loss.
loss = s * unlabeled + eta * tf.pow(s, y_true) * labeled
# Average over remaining dimensions (until we are left with a 1D array).
while len(loss.shape) > 1:
loss = tf.reduce_mean(loss, axis=-1)
return loss
return loss_fn
[docs]def quantile_loss(qs):
"""
Return a quantile (pinball) loss function.
References:
Koenker, R., Hallock, K.F.: Quantile regression.
Journal of Economic Perspectives 15(4), 143-156 (2001). DOI 10.1257/jep.15.4.143
https://www.kaggle.com/ulrich07/quantile-regression-with-keras/notebook
https://www.evergreeninnovations.co/blog-quantile-loss-function-for-machine-learning/
Args:
qs (list): target quantiles (values between 0 and 1).
Returns:
loss (function): function that takes in y_true and y_pred, computing the pinball loss.
"""
# To tensorflow array of constants.
qs = tf.constant(np.array([qs]), dtype=tf.float32)
# Define loss function for keras.
def loss(y_true, y_pred):
"""
Args:
y_true: the true values for y. Shape (N, ) or (N, 1).
y_pred: model predictions with shape (N, len(qs)), where the the axes in the last dimension
correspond to the target quantiles. I.e., y_pred[:, i] --> qs[i].
"""
e = y_true - y_pred
v = tf.maximum(qs * e, (qs - 1) * e)
return tf.reduce_mean(v, axis=-1)
return loss