3.2 损失函数及自定义损失函数
tf.keras.losses.BinaryCrossentropy(
from_logits=False, label_smoothing=0, reduction=losses_utils.ReductionV2.AUTO,
name='binary_crossentropy'
)
参数from_logits
的作用:模型的输出结果没有通过激活函数时,则需要使得from_logits=True
;反之,如果模型的输出结果通过激活函数后,则需要使得from_logits=False
。
原理是什么呢?当你打开tensorflow的源码时,一切都明白了!
if not from_logits:
这个判断语句后,则会直接阶段bce (binary_crossentropy);反之,则会计算nn.sigmoid_cross_entropy_with_logits
。
def binary_crossentropy(target, output, from_logits=False):
"""Binary crossentropy between an output tensor and a target tensor.
Arguments:
target: A tensor with the same shape as `output`.
output: A tensor.
from_logits: Whether `output` is expected to be a logits tensor.
By default, we consider that `output`
encodes a probability distribution.
Returns:
A tensor.
"""
if not from_logits:
if (isinstance(output, (ops.EagerTensor, variables_module.Variable)) or
output.op.type != 'Sigmoid'):
epsilon_ = _constant_to_tensor(epsilon(), output.dtype.base_dtype)
output = clip_ops.clip_by_value(output, epsilon_, 1. - epsilon_)
# Compute cross entropy from probabilities.
bce = target * math_ops.log(output + epsilon())
bce += (1 - target) * math_ops.log(1 - output + epsilon())
return -bce
else:
# When sigmoid activation function is used for output operation, we
# use logits from the sigmoid function directly to compute loss in order
# to prevent collapsing zero when training.
assert len(output.op.inputs) == 1
output = output.op.inputs[0]
return nn.sigmoid_cross_entropy_with_logits(labels=target, logits=output)
下面我们再分析下nn.sigmoid_cross_entropy_with_logits
:
推导过程如下所示:
For brevity, let `x = logits`, `z = labels`. The logistic loss is
z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x))
= z * -log(1 / (1 + exp(-x))) + (1 - z) * -log(exp(-x) / (1 + exp(-x)))
= z * log(1 + exp(-x)) + (1 - z) * (-log(exp(-x)) + log(1 + exp(-x)))
= z * log(1 + exp(-x)) + (1 - z) * (x + log(1 + exp(-x))
= (1 - z) * x + log(1 + exp(-x))
= x - x * z + log(1 + exp(-x))
For x < 0, to avoid overflow in exp(-x), we reformulate the above
x - x * z + log(1 + exp(-x))
= log(exp(x)) - x * z + log(1 + exp(-x))
= - x * z + log(1 + exp(x))
Hence, to ensure stability and avoid overflow, the implementation uses this
equivalent formulation
max(x, 0) - x * z + log(1 + exp(-abs(x)))
`logits` and `labels` must have the same type and shape.
def sigmoid_cross_entropy_with_logits( # pylint: disable=invalid-name
_sentinel=None,
labels=None,
logits=None,
name=None):
"""Computes sigmoid cross entropy given `logits`.
Measures the probability error in discrete classification tasks in which each
class is independent and not mutually exclusive. For instance, one could
perform multilabel classification where a picture can contain both an elephant
and a dog at the same time.
For brevity, let `x = logits`, `z = labels`. The logistic loss is
z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x))
= z * -log(1 / (1 + exp(-x))) + (1 - z) * -log(exp(-x) / (1 + exp(-x)))
= z * log(1 + exp(-x)) + (1 - z) * (-log(exp(-x)) + log(1 + exp(-x)))
= z * log(1 + exp(-x)) + (1 - z) * (x + log(1 + exp(-x))
= (1 - z) * x + log(1 + exp(-x))
= x - x * z + log(1 + exp(-x))
For x < 0, to avoid overflow in exp(-x), we reformulate the above
x - x * z + log(1 + exp(-x))
= log(exp(x)) - x * z + log(1 + exp(-x))
= - x * z + log(1 + exp(x))
Hence, to ensure stability and avoid overflow, the implementation uses this
equivalent formulation
max(x, 0) - x * z + log(1 + exp(-abs(x)))
`logits` and `labels` must have the same type and shape.
Args:
_sentinel: Used to prevent positional parameters. Internal, do not use.
labels: A `Tensor` of the same type and shape as `logits`.
logits: A `Tensor` of type `float32` or `float64`.
name: A name for the operation (optional).
Returns:
A `Tensor` of the same shape as `logits` with the componentwise
logistic losses.
Raises:
ValueError: If `logits` and `labels` do not have the same shape.
"""
# pylint: disable=protected-access
nn_ops._ensure_xent_args("sigmoid_cross_entropy_with_logits", _sentinel,
labels, logits)
# pylint: enable=protected-access
with ops.name_scope(name, "logistic_loss", [logits, labels]) as name:
logits = ops.convert_to_tensor(logits, name="logits")
labels = ops.convert_to_tensor(labels, name="labels")
try:
labels.get_shape().merge_with(logits.get_shape())
except ValueError:
raise ValueError("logits and labels must have the same shape (%s vs %s)" %
(logits.get_shape(), labels.get_shape()))
# The logistic loss formula from above is
# x - x * z + log(1 + exp(-x))
# For x < 0, a more numerically stable formula is
# -x * z + log(1 + exp(x))
# Note that these two expressions can be combined into the following:
# max(x, 0) - x * z + log(1 + exp(-abs(x)))
# To allow computing gradients at zero, we define custom versions of max and
# abs functions.
zeros = array_ops.zeros_like(logits, dtype=logits.dtype)
cond = (logits >= zeros)
relu_logits = array_ops.where(cond, logits, zeros)
neg_abs_logits = array_ops.where(cond, -logits, logits)
return math_ops.add(
relu_logits - logits * labels,
math_ops.log1p(math_ops.exp(neg_abs_logits)),
name=name)
最后更新于