损失函数 #

损失函数概述 #

损失函数（Loss Function）用于衡量模型预测值与真实值之间的差距，是模型训练优化的核心。

损失函数分类 #

text

┌─────────────────────────────────────────────────────────────┐
│                    损失函数分类                              │
├─────────────────────────────────────────────────────────────┤
│                                                             │
│  回归损失                                                    │
│  ├── MSE (均方误差)                                         │
│  ├── MAE (平均绝对误差)                                     │
│  ├── Huber Loss                                             │
│  └── Log-Cosh Loss                                          │
│                                                             │
│  分类损失                                                    │
│  ├── Binary Crossentropy                                    │
│  ├── Categorical Crossentropy                               │
│  ├── Sparse Categorical Crossentropy                        │
│  └── Hinge Loss                                             │
│                                                             │
│  其他损失                                                    │
│  ├── Contrastive Loss                                       │
│  ├── Triplet Loss                                           │
│  └── Focal Loss                                             │
│                                                             │
└─────────────────────────────────────────────────────────────┘

回归损失 #

均方误差 (MSE) #

python

import tensorflow as tf
import numpy as np

y_true = tf.constant([1.0, 2.0, 3.0, 4.0])
y_pred = tf.constant([1.1, 1.9, 3.2, 3.8])

# 使用 API
mse = tf.keras.losses.MeanSquaredError()
loss = mse(y_true, y_pred)
print(f"MSE: {loss.numpy()}")

# 手动计算
mse_manual = tf.reduce_mean(tf.square(y_true - y_pred))
print(f"MSE (手动): {mse_manual.numpy()}")

# 在模型中使用
model.compile(
    optimizer='adam',
    loss=tf.keras.losses.MeanSquaredError()
)

# 或简写
model.compile(optimizer='adam', loss='mse')

平均绝对误差 (MAE) #

python

import tensorflow as tf

y_true = tf.constant([1.0, 2.0, 3.0, 4.0])
y_pred = tf.constant([1.1, 1.9, 3.2, 3.8])

mae = tf.keras.losses.MeanAbsoluteError()
loss = mae(y_true, y_pred)
print(f"MAE: {loss.numpy()}")

# 手动计算
mae_manual = tf.reduce_mean(tf.abs(y_true - y_pred))
print(f"MAE (手动): {mae_manual.numpy()}")

Huber Loss #

python

import tensorflow as tf

y_true = tf.constant([1.0, 2.0, 3.0, 10.0])
y_pred = tf.constant([1.1, 1.9, 3.2, 5.0])

# Huber Loss 对异常值更鲁棒
huber = tf.keras.losses.Huber(delta=1.0)
loss = huber(y_true, y_pred)
print(f"Huber Loss: {loss.numpy()}")

# 对比 MSE
mse = tf.keras.losses.MeanSquaredError()
mse_loss = mse(y_true, y_pred)
print(f"MSE: {mse_loss.numpy()}")

Log-Cosh Loss #

python

import tensorflow as tf

y_true = tf.constant([1.0, 2.0, 3.0, 4.0])
y_pred = tf.constant([1.1, 1.9, 3.2, 3.8])

log_cosh = tf.keras.losses.LogCosh()
loss = log_cosh(y_true, y_pred)
print(f"Log-Cosh Loss: {loss.numpy()}")

分类损失 #

二分类交叉熵 #

python

import tensorflow as tf

y_true = tf.constant([0, 1, 0, 1])
y_pred = tf.constant([0.1, 0.9, 0.2, 0.8])

# Binary Crossentropy
bce = tf.keras.losses.BinaryCrossentropy()
loss = bce(y_true, y_pred)
print(f"Binary Crossentropy: {loss.numpy()}")

# from_logits=True（输出层无激活函数）
y_logits = tf.constant([-2.0, 2.0, -1.0, 1.5])
bce_logits = tf.keras.losses.BinaryCrossentropy(from_logits=True)
loss = bce_logits(y_true, y_logits)
print(f"Binary CE (from logits): {loss.numpy()}")

# 在模型中使用
model = tf.keras.Sequential([
    tf.keras.layers.Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam', loss='binary_crossentropy')

多分类交叉熵 #

python

import tensorflow as tf

y_true = tf.constant([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
y_pred = tf.constant([[0.9, 0.05, 0.05], [0.1, 0.8, 0.1], [0.1, 0.2, 0.7]])

# Categorical Crossentropy
cce = tf.keras.losses.CategoricalCrossentropy()
loss = cce(y_true, y_pred)
print(f"Categorical Crossentropy: {loss.numpy()}")

# from_logits=True
y_logits = tf.constant([[2.0, 0.5, 0.1], [0.5, 1.5, 0.3], [0.2, 0.5, 2.0]])
cce_logits = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
loss = cce_logits(y_true, y_logits)
print(f"Categorical CE (from logits): {loss.numpy()}")

稀疏分类交叉熵 #

python

import tensorflow as tf

y_true = tf.constant([0, 1, 2])
y_pred = tf.constant([[0.9, 0.05, 0.05], [0.1, 0.8, 0.1], [0.1, 0.2, 0.7]])

# Sparse Categorical Crossentropy（标签为整数）
scce = tf.keras.losses.SparseCategoricalCrossentropy()
loss = scce(y_true, y_pred)
print(f"Sparse Categorical CE: {loss.numpy()}")

# from_logits=True
y_logits = tf.constant([[2.0, 0.5, 0.1], [0.5, 1.5, 0.3], [0.2, 0.5, 2.0]])
scce_logits = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
loss = scce_logits(y_true, y_logits)
print(f"Sparse CE (from logits): {loss.numpy()}")

# 在模型中使用
model = tf.keras.Sequential([
    tf.keras.layers.Dense(10)
])
model.compile(
    optimizer='adam',
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
)

Hinge Loss #

python

import tensorflow as tf

y_true = tf.constant([-1, 1, -1, 1])
y_pred = tf.constant([-0.5, 0.8, -0.3, 0.9])

hinge = tf.keras.losses.Hinge()
loss = hinge(y_true, y_pred)
print(f"Hinge Loss: {loss.numpy()}")

# Squared Hinge
squared_hinge = tf.keras.losses.SquaredHinge()
loss = squared_hinge(y_true, y_pred)
print(f"Squared Hinge Loss: {loss.numpy()}")

自定义损失函数 #

函数式定义 #

python

import tensorflow as tf

def custom_mse(y_true, y_pred):
    return tf.reduce_mean(tf.square(y_true - y_pred))

model.compile(optimizer='adam', loss=custom_mse)

# 带参数的自定义损失
def weighted_mse(weight):
    def loss(y_true, y_pred):
        return tf.reduce_mean(weight * tf.square(y_true - y_pred))
    return loss

model.compile(optimizer='adam', loss=weighted_mse(2.0))

类式定义 #

python

import tensorflow as tf

class CustomLoss(tf.keras.losses.Loss):
    def __init__(self, name='custom_loss', **kwargs):
        super().__init__(name=name, **kwargs)
    
    def call(self, y_true, y_pred):
        return tf.reduce_mean(tf.square(y_true - y_pred))
    
    def get_config(self):
        return super().get_config()

model.compile(optimizer='adam', loss=CustomLoss())

Focal Loss #

python

import tensorflow as tf

class FocalLoss(tf.keras.losses.Loss):
    def __init__(self, gamma=2.0, alpha=0.25, name='focal_loss'):
        super().__init__(name=name)
        self.gamma = gamma
        self.alpha = alpha
    
    def call(self, y_true, y_pred):
        y_pred = tf.clip_by_value(y_pred, 1e-7, 1 - 1e-7)
        pt = tf.where(tf.equal(y_true, 1), y_pred, 1 - y_pred)
        alpha_t = tf.where(tf.equal(y_true, 1), self.alpha, 1 - self.alpha)
        focal_weight = alpha_t * tf.pow(1 - pt, self.gamma)
        bce = tf.keras.losses.binary_crossentropy(y_true, y_pred)
        return tf.reduce_mean(focal_weight * bce)

model.compile(optimizer='adam', loss=FocalLoss(gamma=2.0, alpha=0.25))

Dice Loss #

python

import tensorflow as tf

class DiceLoss(tf.keras.losses.Loss):
    def __init__(self, smooth=1e-6, name='dice_loss'):
        super().__init__(name=name)
        self.smooth = smooth
    
    def call(self, y_true, y_pred):
        y_true = tf.cast(y_true, tf.float32)
        y_pred = tf.cast(y_pred, tf.float32)
        
        intersection = tf.reduce_sum(y_true * y_pred)
        union = tf.reduce_sum(y_true) + tf.reduce_sum(y_pred)
        
        dice = (2.0 * intersection + self.smooth) / (union + self.smooth)
        return 1.0 - dice

model.compile(optimizer='adam', loss=DiceLoss())

多损失函数 #

多输出模型 #

python

import tensorflow as tf

inputs = tf.keras.Input(shape=(100,))
x = tf.keras.layers.Dense(64, activation='relu')(inputs)

# 分类输出
class_output = tf.keras.layers.Dense(10, activation='softmax', name='classification')(x)

# 回归输出
reg_output = tf.keras.layers.Dense(1, name='regression')(x)

model = tf.keras.Model(inputs=inputs, outputs=[class_output, reg_output])

model.compile(
    optimizer='adam',
    loss={
        'classification': 'sparse_categorical_crossentropy',
        'regression': 'mse'
    },
    loss_weights={
        'classification': 1.0,
        'regression': 0.5
    }
)

损失函数与正则化 #

python

import tensorflow as tf

# L2 正则化
model = tf.keras.Sequential([
    tf.keras.layers.Dense(
        64,
        activation='relu',
        kernel_regularizer=tf.keras.regularizers.l2(0.01)
    ),
    tf.keras.layers.Dense(10)
])

model.compile(
    optimizer='adam',
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
)

# 正则化损失会自动添加到总损失中

下一步 #

现在你已经掌握了损失函数，接下来学习优化器，了解如何选择和配置优化算法！