性能指标 #
概述 #
性能指标用于量化评估模型的预测能力,选择合适的指标对于模型优化至关重要。
指标分类 #
| 类型 | 指标 | 适用任务 |
|---|---|---|
| 分类指标 | 准确率、精确率、召回率、F1 | 分类问题 |
| 回归指标 | MSE、MAE、R² | 回归问题 |
| 聚类指标 | 轮廓系数、ARI | 聚类问题 |
| 排序指标 | ROC-AUC、PR-AUC | 排序问题 |
分类指标 #
准确率(Accuracy) #
python
from sklearn.metrics import accuracy_score
import numpy as np
y_true = [0, 1, 1, 0, 1, 0, 1, 0]
y_pred = [0, 1, 0, 0, 1, 0, 1, 1]
acc = accuracy_score(y_true, y_pred)
print(f"准确率: {acc:.4f}")
acc_normalized = accuracy_score(y_true, y_pred, normalize=False)
print(f"正确预测数: {acc_normalized}")
混淆矩阵 #
python
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
from sklearn.metrics import ConfusionMatrixDisplay
cm = confusion_matrix(y_true, y_pred)
print("混淆矩阵:")
print(cm)
disp = ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot(cmap='Blues')
plt.title('Confusion Matrix')
精确率、召回率、F1 #
python
from sklearn.metrics import precision_score, recall_score, f1_score
precision = precision_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)
print(f"精确率: {precision:.4f}")
print(f"召回率: {recall:.4f}")
print(f"F1分数: {f1:.4f}")
分类报告 #
python
from sklearn.metrics import classification_report
print(classification_report(y_true, y_pred, target_names=['Class 0', 'Class 1']))
多分类指标 #
python
y_true_multi = [0, 1, 2, 0, 1, 2]
y_pred_multi = [0, 2, 1, 0, 0, 1]
precision_macro = precision_score(y_true_multi, y_pred_multi, average='macro')
precision_micro = precision_score(y_true_multi, y_pred_multi, average='micro')
precision_weighted = precision_score(y_true_multi, y_pred_multi, average='weighted')
print(f"Macro 精确率: {precision_macro:.4f}")
print(f"Micro 精确率: {precision_micro:.4f}")
print(f"Weighted 精确率: {precision_weighted:.4f}")
平均方式说明 #
| 平均方式 | 描述 |
|---|---|
binary |
二分类 |
micro |
全局计算 |
macro |
类别平均 |
weighted |
加权平均 |
ROC 曲线和 AUC #
ROC 曲线 #
python
from sklearn.metrics import roc_curve, auc
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
X, y = make_classification(n_samples=1000, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
model = LogisticRegression()
model.fit(X_train, y_train)
y_proba = model.predict_proba(X_test)[:, 1]
fpr, tpr, thresholds = roc_curve(y_test, y_proba)
roc_auc = auc(fpr, tpr)
plt.plot(fpr, tpr, label=f'ROC curve (AUC = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend()
plt.title('ROC Curve')
AUC 分数 #
python
from sklearn.metrics import roc_auc_score
auc_score = roc_auc_score(y_test, y_proba)
print(f"AUC: {auc_score:.4f}")
多分类 AUC #
python
from sklearn.datasets import load_iris
from sklearn.preprocessing import label_binarize
iris = load_iris()
X, y = iris.data, iris.target
y_bin = label_binarize(y, classes=[0, 1, 2])
model.fit(X, y)
y_proba = model.predict_proba(X)
auc_ovr = roc_auc_score(y_bin, y_proba, multi_class='ovr')
auc_ovo = roc_auc_score(y_bin, y_proba, multi_class='ovo')
print(f"OvR AUC: {auc_ovr:.4f}")
print(f"OvO AUC: {auc_ovo:.4f}")
PR 曲线 #
精确率-召回率曲线 #
python
from sklearn.metrics import precision_recall_curve, average_precision_score
precision, recall, thresholds = precision_recall_curve(y_test, y_proba)
ap = average_precision_score(y_test, y_proba)
plt.plot(recall, precision, label=f'PR curve (AP = {ap:.2f})')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.legend()
plt.title('Precision-Recall Curve')
回归指标 #
均方误差(MSE) #
python
from sklearn.metrics import mean_squared_error
import numpy as np
y_true = [3.0, -0.5, 2.0, 7.0]
y_pred = [2.5, 0.0, 2.0, 8.0]
mse = mean_squared_error(y_true, y_pred)
rmse = np.sqrt(mse)
print(f"MSE: {mse:.4f}")
print(f"RMSE: {rmse:.4f}")
平均绝对误差(MAE) #
python
from sklearn.metrics import mean_absolute_error
mae = mean_absolute_error(y_true, y_pred)
print(f"MAE: {mae:.4f}")
R² 分数 #
python
from sklearn.metrics import r2_score
r2 = r2_score(y_true, y_pred)
print(f"R²: {r2:.4f}")
解释方差分数 #
python
from sklearn.metrics import explained_variance_score
evs = explained_variance_score(y_true, y_pred)
print(f"解释方差分数: {evs:.4f}")
平均绝对百分比误差(MAPE) #
python
def mape(y_true, y_pred):
return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
mape_value = mape(np.array(y_true), np.array(y_pred))
print(f"MAPE: {mape_value:.2f}%")
回归指标对比 #
| 指标 | 范围 | 最佳值 | 特点 |
|---|---|---|---|
| MSE | [0, ∞) | 0 | 对大误差敏感 |
| RMSE | [0, ∞) | 0 | 与原数据同单位 |
| MAE | [0, ∞) | 0 | 对异常值鲁棒 |
| R² | (-∞, 1] | 1 | 可解释性 |
| MAPE | [0, ∞) | 0 | 百分比误差 |
聚类指标 #
轮廓系数 #
python
from sklearn.metrics import silhouette_score
from sklearn.cluster import KMeans
from sklearn.datasets import make_blobs
X, _ = make_blobs(n_samples=300, centers=4, random_state=42)
kmeans = KMeans(n_clusters=4, random_state=42)
labels = kmeans.fit_predict(X)
score = silhouette_score(X, labels)
print(f"轮廓系数: {score:.4f}")
调整兰德指数(ARI) #
python
from sklearn.metrics import adjusted_rand_score
y_true = [0, 0, 1, 1, 2, 2]
y_pred = [0, 0, 1, 1, 2, 2]
ari = adjusted_rand_score(y_true, y_pred)
print(f"ARI: {ari:.4f}")
标准化互信息(NMI) #
python
from sklearn.metrics import normalized_mutual_info_score
nmi = normalized_mutual_info_score(y_true, y_pred)
print(f"NMI: {nmi:.4f}")
同质性和完整性 #
python
from sklearn.metrics import homogeneity_score, completeness_score, v_measure_score
homogeneity = homogeneity_score(y_true, y_pred)
completeness = completeness_score(y_true, y_pred)
v_measure = v_measure_score(y_true, y_pred)
print(f"同质性: {homogeneity:.4f}")
print(f"完整性: {completeness:.4f}")
print(f"V-measure: {v_measure:.4f}")
多标签指标 #
多标签准确率 #
python
from sklearn.metrics import accuracy_score
y_true = [[0, 1], [1, 1], [0, 0]]
y_pred = [[0, 1], [1, 0], [0, 0]]
acc = accuracy_score(y_true, y_pred)
print(f"多标签准确率: {acc:.4f}")
Hamming 损失 #
python
from sklearn.metrics import hamming_loss
hl = hamming_loss(y_true, y_pred)
print(f"Hamming 损失: {hl:.4f}")
Jaccard 相似度 #
python
from sklearn.metrics import jaccard_score
js = jaccard_score(y_true, y_pred, average='samples')
print(f"Jaccard 相似度: {js:.4f}")
自定义评分器 #
创建自定义评分器 #
python
from sklearn.metrics import make_scorer
def custom_score(y_true, y_pred):
return np.mean(y_true == y_pred) * 2 - 1
custom_scorer = make_scorer(custom_score, greater_is_better=True)
from sklearn.model_selection import cross_val_score
scores = cross_val_score(model, X, y, cv=5, scoring=custom_scorer)
使用预定义评分器 #
python
from sklearn.metrics import get_scorer
scorers = {
'accuracy': get_scorer('accuracy'),
'roc_auc': get_scorer('roc_auc'),
'f1': get_scorer('f1')
}
for name, scorer in scorers.items():
score = scorer(model, X_test, y_test)
print(f"{name}: {score:.4f}")
阈值优化 #
寻找最佳阈值 #
python
from sklearn.metrics import precision_recall_curve
precision, recall, thresholds = precision_recall_curve(y_test, y_proba)
f1_scores = 2 * precision * recall / (precision + recall + 1e-10)
best_threshold = thresholds[np.argmax(f1_scores)]
print(f"最佳阈值: {best_threshold:.4f}")
y_pred_optimized = (y_proba >= best_threshold).astype(int)
Youden’s J 统计量 #
python
from sklearn.metrics import roc_curve
fpr, tpr, thresholds = roc_curve(y_test, y_proba)
j_scores = tpr - fpr
best_threshold = thresholds[np.argmax(j_scores)]
print(f"Youden's J 最佳阈值: {best_threshold:.4f}")
指标可视化 #
多指标雷达图 #
python
import matplotlib.pyplot as plt
from math import pi
metrics = {
'Accuracy': accuracy_score(y_test, y_pred),
'Precision': precision_score(y_test, y_pred),
'Recall': recall_score(y_test, y_pred),
'F1': f1_score(y_test, y_pred),
'AUC': roc_auc_score(y_test, y_proba)
}
categories = list(metrics.keys())
values = list(metrics.values())
values += values[:1]
angles = [n / float(len(categories)) * 2 * pi for n in range(len(categories))]
angles += angles[:1]
fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(polar=True))
ax.plot(angles, values, 'o-', linewidth=2)
ax.fill(angles, values, alpha=0.25)
ax.set_xticks(angles[:-1])
ax.set_xticklabels(categories)
最佳实践 #
1. 选择合适的指标 #
| 场景 | 推荐指标 |
|---|---|
| 平衡分类 | 准确率 |
| 不平衡分类 | F1, AUC |
| 信息检索 | 精确率, 召回率 |
| 回归预测 | RMSE, MAE |
| 模型解释 | R² |
2. 考虑业务需求 #
python
if cost_false_positive > cost_false_negative:
metric = 'precision'
else:
metric = 'recall'
3. 多指标评估 #
python
from sklearn.model_selection import cross_validate
scoring = ['accuracy', 'precision', 'recall', 'f1', 'roc_auc']
results = cross_validate(model, X, y, cv=5, scoring=scoring)
4. 验证集评估 #
python
y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)[:, 1]
print(f"准确率: {accuracy_score(y_test, y_pred):.4f}")
print(f"AUC: {roc_auc_score(y_test, y_proba):.4f}")
下一步 #
掌握性能指标后,继续学习 超参数调优 了解如何优化模型参数!
最后更新:2026-04-04