性能指标 #

概述 #

性能指标用于量化评估模型的预测能力，选择合适的指标对于模型优化至关重要。

指标分类 #

类型	指标	适用任务
分类指标	准确率、精确率、召回率、F1	分类问题
回归指标	MSE、MAE、R²	回归问题
聚类指标	轮廓系数、ARI	聚类问题
排序指标	ROC-AUC、PR-AUC	排序问题

分类指标 #

准确率（Accuracy） #

python

from sklearn.metrics import accuracy_score
import numpy as np

y_true = [0, 1, 1, 0, 1, 0, 1, 0]
y_pred = [0, 1, 0, 0, 1, 0, 1, 1]

acc = accuracy_score(y_true, y_pred)
print(f"准确率: {acc:.4f}")

acc_normalized = accuracy_score(y_true, y_pred, normalize=False)
print(f"正确预测数: {acc_normalized}")

混淆矩阵 #

python

from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
from sklearn.metrics import ConfusionMatrixDisplay

cm = confusion_matrix(y_true, y_pred)
print("混淆矩阵:")
print(cm)

disp = ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot(cmap='Blues')
plt.title('Confusion Matrix')

精确率、召回率、F1 #

python

from sklearn.metrics import precision_score, recall_score, f1_score

precision = precision_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)

print(f"精确率: {precision:.4f}")
print(f"召回率: {recall:.4f}")
print(f"F1分数: {f1:.4f}")

分类报告 #

python

from sklearn.metrics import classification_report

print(classification_report(y_true, y_pred, target_names=['Class 0', 'Class 1']))

多分类指标 #

python

y_true_multi = [0, 1, 2, 0, 1, 2]
y_pred_multi = [0, 2, 1, 0, 0, 1]

precision_macro = precision_score(y_true_multi, y_pred_multi, average='macro')
precision_micro = precision_score(y_true_multi, y_pred_multi, average='micro')
precision_weighted = precision_score(y_true_multi, y_pred_multi, average='weighted')

print(f"Macro 精确率: {precision_macro:.4f}")
print(f"Micro 精确率: {precision_micro:.4f}")
print(f"Weighted 精确率: {precision_weighted:.4f}")

平均方式说明 #

平均方式	描述
`binary`	二分类
`micro`	全局计算
`macro`	类别平均
`weighted`	加权平均

ROC 曲线和 AUC #

ROC 曲线 #

python

from sklearn.metrics import roc_curve, auc
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

X, y = make_classification(n_samples=1000, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

model = LogisticRegression()
model.fit(X_train, y_train)

y_proba = model.predict_proba(X_test)[:, 1]

fpr, tpr, thresholds = roc_curve(y_test, y_proba)
roc_auc = auc(fpr, tpr)

plt.plot(fpr, tpr, label=f'ROC curve (AUC = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend()
plt.title('ROC Curve')

AUC 分数 #

python

from sklearn.metrics import roc_auc_score

auc_score = roc_auc_score(y_test, y_proba)
print(f"AUC: {auc_score:.4f}")

多分类 AUC #

python

from sklearn.datasets import load_iris
from sklearn.preprocessing import label_binarize

iris = load_iris()
X, y = iris.data, iris.target
y_bin = label_binarize(y, classes=[0, 1, 2])

model.fit(X, y)
y_proba = model.predict_proba(X)

auc_ovr = roc_auc_score(y_bin, y_proba, multi_class='ovr')
auc_ovo = roc_auc_score(y_bin, y_proba, multi_class='ovo')

print(f"OvR AUC: {auc_ovr:.4f}")
print(f"OvO AUC: {auc_ovo:.4f}")

PR 曲线 #

精确率-召回率曲线 #

python

from sklearn.metrics import precision_recall_curve, average_precision_score

precision, recall, thresholds = precision_recall_curve(y_test, y_proba)
ap = average_precision_score(y_test, y_proba)

plt.plot(recall, precision, label=f'PR curve (AP = {ap:.2f})')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.legend()
plt.title('Precision-Recall Curve')

回归指标 #

均方误差（MSE） #

python

from sklearn.metrics import mean_squared_error
import numpy as np

y_true = [3.0, -0.5, 2.0, 7.0]
y_pred = [2.5, 0.0, 2.0, 8.0]

mse = mean_squared_error(y_true, y_pred)
rmse = np.sqrt(mse)

print(f"MSE: {mse:.4f}")
print(f"RMSE: {rmse:.4f}")

平均绝对误差（MAE） #

python

from sklearn.metrics import mean_absolute_error

mae = mean_absolute_error(y_true, y_pred)
print(f"MAE: {mae:.4f}")

R² 分数 #

python

from sklearn.metrics import r2_score

r2 = r2_score(y_true, y_pred)
print(f"R²: {r2:.4f}")

解释方差分数 #

python

from sklearn.metrics import explained_variance_score

evs = explained_variance_score(y_true, y_pred)
print(f"解释方差分数: {evs:.4f}")

平均绝对百分比误差（MAPE） #

python

def mape(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

mape_value = mape(np.array(y_true), np.array(y_pred))
print(f"MAPE: {mape_value:.2f}%")

回归指标对比 #

指标	范围	最佳值	特点
MSE	[0, ∞)	0	对大误差敏感
RMSE	[0, ∞)	0	与原数据同单位
MAE	[0, ∞)	0	对异常值鲁棒
R²	(-∞, 1]	1	可解释性
MAPE	[0, ∞)	0	百分比误差

聚类指标 #

轮廓系数 #

python

from sklearn.metrics import silhouette_score
from sklearn.cluster import KMeans
from sklearn.datasets import make_blobs

X, _ = make_blobs(n_samples=300, centers=4, random_state=42)
kmeans = KMeans(n_clusters=4, random_state=42)
labels = kmeans.fit_predict(X)

score = silhouette_score(X, labels)
print(f"轮廓系数: {score:.4f}")

调整兰德指数（ARI） #

python

from sklearn.metrics import adjusted_rand_score

y_true = [0, 0, 1, 1, 2, 2]
y_pred = [0, 0, 1, 1, 2, 2]

ari = adjusted_rand_score(y_true, y_pred)
print(f"ARI: {ari:.4f}")

标准化互信息（NMI） #

python

from sklearn.metrics import normalized_mutual_info_score

nmi = normalized_mutual_info_score(y_true, y_pred)
print(f"NMI: {nmi:.4f}")

同质性和完整性 #

python

from sklearn.metrics import homogeneity_score, completeness_score, v_measure_score

homogeneity = homogeneity_score(y_true, y_pred)
completeness = completeness_score(y_true, y_pred)
v_measure = v_measure_score(y_true, y_pred)

print(f"同质性: {homogeneity:.4f}")
print(f"完整性: {completeness:.4f}")
print(f"V-measure: {v_measure:.4f}")

多标签指标 #

多标签准确率 #

python

from sklearn.metrics import accuracy_score

y_true = [[0, 1], [1, 1], [0, 0]]
y_pred = [[0, 1], [1, 0], [0, 0]]

acc = accuracy_score(y_true, y_pred)
print(f"多标签准确率: {acc:.4f}")

Hamming 损失 #

python

from sklearn.metrics import hamming_loss

hl = hamming_loss(y_true, y_pred)
print(f"Hamming 损失: {hl:.4f}")

Jaccard 相似度 #

python

from sklearn.metrics import jaccard_score

js = jaccard_score(y_true, y_pred, average='samples')
print(f"Jaccard 相似度: {js:.4f}")

自定义评分器 #

创建自定义评分器 #

python

from sklearn.metrics import make_scorer

def custom_score(y_true, y_pred):
    return np.mean(y_true == y_pred) * 2 - 1

custom_scorer = make_scorer(custom_score, greater_is_better=True)

from sklearn.model_selection import cross_val_score
scores = cross_val_score(model, X, y, cv=5, scoring=custom_scorer)

使用预定义评分器 #

python

from sklearn.metrics import get_scorer

scorers = {
    'accuracy': get_scorer('accuracy'),
    'roc_auc': get_scorer('roc_auc'),
    'f1': get_scorer('f1')
}

for name, scorer in scorers.items():
    score = scorer(model, X_test, y_test)
    print(f"{name}: {score:.4f}")

阈值优化 #

寻找最佳阈值 #

python

from sklearn.metrics import precision_recall_curve

precision, recall, thresholds = precision_recall_curve(y_test, y_proba)
f1_scores = 2 * precision * recall / (precision + recall + 1e-10)
best_threshold = thresholds[np.argmax(f1_scores)]

print(f"最佳阈值: {best_threshold:.4f}")

y_pred_optimized = (y_proba >= best_threshold).astype(int)

Youden’s J 统计量 #

python

from sklearn.metrics import roc_curve

fpr, tpr, thresholds = roc_curve(y_test, y_proba)
j_scores = tpr - fpr
best_threshold = thresholds[np.argmax(j_scores)]

print(f"Youden's J 最佳阈值: {best_threshold:.4f}")

指标可视化 #

多指标雷达图 #

python

import matplotlib.pyplot as plt
from math import pi

metrics = {
    'Accuracy': accuracy_score(y_test, y_pred),
    'Precision': precision_score(y_test, y_pred),
    'Recall': recall_score(y_test, y_pred),
    'F1': f1_score(y_test, y_pred),
    'AUC': roc_auc_score(y_test, y_proba)
}

categories = list(metrics.keys())
values = list(metrics.values())
values += values[:1]

angles = [n / float(len(categories)) * 2 * pi for n in range(len(categories))]
angles += angles[:1]

fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(polar=True))
ax.plot(angles, values, 'o-', linewidth=2)
ax.fill(angles, values, alpha=0.25)
ax.set_xticks(angles[:-1])
ax.set_xticklabels(categories)

最佳实践 #

1. 选择合适的指标 #

场景	推荐指标
平衡分类	准确率
不平衡分类	F1, AUC
信息检索	精确率, 召回率
回归预测	RMSE, MAE
模型解释	R²

2. 考虑业务需求 #

python

if cost_false_positive > cost_false_negative:
    metric = 'precision'
else:
    metric = 'recall'

3. 多指标评估 #

python

from sklearn.model_selection import cross_validate

scoring = ['accuracy', 'precision', 'recall', 'f1', 'roc_auc']
results = cross_validate(model, X, y, cv=5, scoring=scoring)

4. 验证集评估 #

python

y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)[:, 1]

print(f"准确率: {accuracy_score(y_test, y_pred):.4f}")
print(f"AUC: {roc_auc_score(y_test, y_proba):.4f}")

下一步 #

掌握性能指标后，继续学习超参数调优了解如何优化模型参数！