评估指标 #

指标概述 #

评估指标（Metrics）用于监控模型训练过程和评估模型性能。与损失函数不同，指标不参与模型优化，只用于观察和评估。

指标分类 #

text

┌─────────────────────────────────────────────────────────────┐
│                    评估指标分类                              │
├─────────────────────────────────────────────────────────────┤
│                                                             │
│  分类指标                                                    │
│  ├── Accuracy (准确率)                                      │
│  ├── Precision (精确率)                                     │
│  ├── Recall (召回率)                                        │
│  ├── F1-Score                                               │
│  ├── AUC                                                    │
│  └── Confusion Matrix                                       │
│                                                             │
│  回归指标                                                    │
│  ├── MAE (平均绝对误差)                                     │
│  ├── MSE (均方误差)                                         │
│  ├── RMSE (均方根误差)                                      │
│  └── R² (决定系数)                                          │
│                                                             │
│  其他指标                                                    │
│  ├── Mean                                                   │
│  ├── Sum                                                    │
│  └── 自定义指标                                             │
│                                                             │
└─────────────────────────────────────────────────────────────┘

分类指标 #

Accuracy #

python

import tensorflow as tf

model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(784,)),
    tf.keras.layers.Dense(10, activation='softmax')
])

model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

# 稀疏分类准确率
model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=[tf.keras.metrics.SparseCategoricalAccuracy(name='accuracy')]
)

# 分类准确率（one-hot 标签）
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=[tf.keras.metrics.CategoricalAccuracy(name='accuracy')]
)

# 二分类准确率
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=[tf.keras.metrics.BinaryAccuracy(name='accuracy')]
)

Top-K 准确率 #

python

import tensorflow as tf

model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(784,)),
    tf.keras.layers.Dense(10)
])

model.compile(
    optimizer='adam',
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=[
        tf.keras.metrics.SparseCategoricalAccuracy(name='accuracy'),
        tf.keras.metrics.SparseTopKCategoricalAccuracy(k=5, name='top5_acc')
    ]
)

Precision 和 Recall #

python

import tensorflow as tf

model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(100,)),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=[
        tf.keras.metrics.Precision(name='precision'),
        tf.keras.metrics.Recall(name='recall')
    ]
)

# 指定阈值
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=[
        tf.keras.metrics.Precision(thresholds=0.5, name='precision'),
        tf.keras.metrics.Recall(thresholds=0.5, name='recall')
    ]
)

AUC #

python

import tensorflow as tf

model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(100,)),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=[
        tf.keras.metrics.AUC(name='auc'),
        tf.keras.metrics.AUC(curve='PR', name='auc_pr')
    ]
)

F1 Score (自定义) #

python

import tensorflow as tf

class F1Score(tf.keras.metrics.Metric):
    def __init__(self, name='f1_score', **kwargs):
        super().__init__(name=name, **kwargs)
        self.precision = tf.keras.metrics.Precision()
        self.recall = tf.keras.metrics.Recall()
    
    def update_state(self, y_true, y_pred, sample_weight=None):
        self.precision.update_state(y_true, y_pred, sample_weight)
        self.recall.update_state(y_true, y_pred, sample_weight)
    
    def result(self):
        p = self.precision.result()
        r = self.recall.result()
        return 2 * p * r / (p + r + 1e-7)
    
    def reset_states(self):
        self.precision.reset_states()
        self.recall.reset_states()

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=[F1Score()]
)

回归指标 #

MAE 和 MSE #

python

import tensorflow as tf

model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(10,)),
    tf.keras.layers.Dense(1)
])

model.compile(
    optimizer='adam',
    loss='mse',
    metrics=[
        tf.keras.metrics.MeanAbsoluteError(name='mae'),
        tf.keras.metrics.MeanSquaredError(name='mse'),
        tf.keras.metrics.RootMeanSquaredError(name='rmse')
    ]
)

R² Score #

python

import tensorflow as tf

class R2Score(tf.keras.metrics.Metric):
    def __init__(self, name='r2_score', **kwargs):
        super().__init__(name=name, **kwargs)
        self.sum_squared_residual = self.add_weight(name='ssr', initializer='zeros')
        self.sum_squared_total = self.add_weight(name='sst', initializer='zeros')
        self.count = self.add_weight(name='count', initializer='zeros')
    
    def update_state(self, y_true, y_pred, sample_weight=None):
        y_true = tf.cast(y_true, tf.float32)
        y_pred = tf.cast(y_pred, tf.float32)
        
        residual = y_true - y_pred
        ssr = tf.reduce_sum(tf.square(residual))
        
        mean_true = tf.reduce_mean(y_true)
        sst = tf.reduce_sum(tf.square(y_true - mean_true))
        
        self.sum_squared_residual.assign_add(ssr)
        self.sum_squared_total.assign_add(sst)
        self.count.assign_add(tf.cast(tf.shape(y_true)[0], tf.float32))
    
    def result(self):
        return 1 - self.sum_squared_residual / (self.sum_squared_total + 1e-7)
    
    def reset_states(self):
        self.sum_squared_residual.assign(0.0)
        self.sum_squared_total.assign(0.0)
        self.count.assign(0.0)

model.compile(
    optimizer='adam',
    loss='mse',
    metrics=[R2Score()]
)

其他指标 #

Mean 和 Sum #

python

import tensorflow as tf

model.compile(
    optimizer='adam',
    loss='mse',
    metrics=[
        tf.keras.metrics.Mean(name='mean'),
        tf.keras.metrics.Sum(name='sum')
    ]
)

MeanMetricWrapper #

python

import tensorflow as tf

def mean_absolute_percentage_error(y_true, y_pred):
    y_true = tf.cast(y_true, tf.float32)
    diff = tf.abs((y_true - y_pred) / tf.maximum(tf.abs(y_true), 1e-7))
    return 100.0 * tf.reduce_mean(diff)

mape = tf.keras.metrics.MeanMetricWrapper(
    fn=mean_absolute_percentage_error,
    name='mape'
)

model.compile(optimizer='adam', loss='mse', metrics=[mape])

自定义指标 #

函数式自定义 #

python

import tensorflow as tf

def custom_metric(y_true, y_pred):
    return tf.reduce_mean(tf.abs(y_true - y_pred) / (tf.abs(y_true) + 1e-7))

model.compile(
    optimizer='adam',
    loss='mse',
    metrics=[custom_metric]
)

类式自定义 #

python

import tensorflow as tf

class CustomMetric(tf.keras.metrics.Metric):
    def __init__(self, name='custom_metric', **kwargs):
        super().__init__(name=name, **kwargs)
        self.total = self.add_weight(name='total', initializer='zeros')
        self.count = self.add_weight(name='count', initializer='zeros')
    
    def update_state(self, y_true, y_pred, sample_weight=None):
        values = tf.abs(y_true - y_pred)
        self.total.assign_add(tf.reduce_sum(values))
        self.count.assign_add(tf.cast(tf.size(values), tf.float32))
    
    def result(self):
        return self.total / self.count
    
    def reset_states(self):
        self.total.assign(0.0)
        self.count.assign(0.0)

model.compile(
    optimizer='adam',
    loss='mse',
    metrics=[CustomMetric()]
)

指标使用示例 #

训练时监控 #

python

import tensorflow as tf

model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(784,)),
    tf.keras.layers.Dense(10, activation='softmax')
])

model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=[
        'accuracy',
        tf.keras.metrics.SparseTopKCategoricalAccuracy(k=5, name='top5_acc')
    ]
)

history = model.fit(
    x_train, y_train,
    epochs=10,
    validation_data=(x_val, y_val)
)

print(f"训练指标: {history.history.keys()}")

手动更新指标 #

python

import tensorflow as tf

metric = tf.keras.metrics.MeanAbsoluteError()

y_true = tf.constant([[1.0], [2.0], [3.0]])
y_pred = tf.constant([[1.1], [1.9], [3.2]])

metric.update_state(y_true, y_pred)
print(f"MAE: {metric.result().numpy()}")

metric.reset_states()

多输出模型指标 #

python

import tensorflow as tf

inputs = tf.keras.Input(shape=(100,))
x = tf.keras.layers.Dense(64, activation='relu')(inputs)

output_a = tf.keras.layers.Dense(10, activation='softmax', name='classification')(x)
output_b = tf.keras.layers.Dense(1, name='regression')(x)

model = tf.keras.Model(inputs=inputs, outputs=[output_a, output_b])

model.compile(
    optimizer='adam',
    loss={
        'classification': 'sparse_categorical_crossentropy',
        'regression': 'mse'
    },
    metrics={
        'classification': ['accuracy'],
        'regression': ['mae']
    }
)

下一步 #

现在你已经掌握了评估指标，接下来学习自定义层，了解如何创建自定义网络层！