支持向量机 #
概述 #
支持向量机(Support Vector Machine, SVM)是一种强大的监督学习算法,通过寻找最优超平面来实现分类或回归。
核心思想 #
SVM 寻找能够最大化类别间隔的超平面:
text
类别 B
○ ○ ○
○ ○
─────────────────── 最大间隔超平面
× ×
× × ×
类别 A
─────────────────── 支持向量边界
○ ○
─────────────────── 支持向量边界
SVM 类型 #
| 类型 | 用途 | 类 |
|---|---|---|
| 线性 SVM | 线性可分数据 | LinearSVC, LinearSVR |
| 非线性 SVM | 复杂边界数据 | SVC, SVR |
线性 SVM #
线性分类 #
python
from sklearn.svm import LinearSVC
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
X, y = make_classification(
n_samples=1000, n_features=10, n_informative=5,
n_redundant=0, random_state=42
)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
clf = LinearSVC(dual='auto', random_state=42)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print(f"准确率: {accuracy_score(y_test, y_pred):.4f}")
线性回归 #
python
from sklearn.svm import LinearSVR
from sklearn.datasets import make_regression
from sklearn.metrics import r2_score
X, y = make_regression(n_samples=1000, n_features=10, noise=10, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
reg = LinearSVR(dual='auto', random_state=42)
reg.fit(X_train, y_train)
y_pred = reg.predict(X_test)
print(f"R²: {r2_score(y_test, y_pred):.4f}")
参数说明 #
| 参数 | 描述 | 默认值 |
|---|---|---|
C |
正则化参数 | 1.0 |
loss |
损失函数 | ‘squared_hinge’ |
penalty |
正则化类型 | ‘l2’ |
dual |
对偶问题 | ‘auto’ |
非线性 SVM #
使用核函数 #
python
from sklearn.svm import SVC
X, y = make_classification(
n_samples=500, n_features=2, n_redundant=0,
n_informative=2, n_clusters_per_class=1, random_state=42
)
clf = SVC(kernel='rbf', C=1.0, gamma='scale')
clf.fit(X_train, y_train)
print(f"准确率: {clf.score(X_test, y_test):.4f}")
核函数类型 #
| 核函数 | 公式 | 适用场景 |
|---|---|---|
linear |
K(x, x’) = x · x’ | 线性可分 |
poly |
K(x, x’) = (γx·x’ + r)^d | 多项式关系 |
rbf |
K(x, x’) = exp(-γ‖x-x’‖²) | 复杂边界 |
sigmoid |
K(x, x’) = tanh(γx·x’ + r) | 神经网络类似 |
核函数对比 #
python
kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for kernel in kernels:
clf = SVC(kernel=kernel, random_state=42)
clf.fit(X_train, y_train)
score = clf.score(X_test, y_test)
print(f"{kernel}: {score:.4f}")
核函数详解 #
RBF 核 #
python
clf_rbf = SVC(kernel='rbf', C=1.0, gamma='scale')
clf_rbf.fit(X_train, y_train)
clf_auto = SVC(kernel='rbf', gamma='auto')
clf_scale = SVC(kernel='rbf', gamma='scale')
clf_custom = SVC(kernel='rbf', gamma=0.1)
gamma 参数影响 #
python
import matplotlib.pyplot as plt
import numpy as np
gammas = [0.01, 0.1, 1, 10, 100]
scores = []
for gamma in gammas:
clf = SVC(kernel='rbf', gamma=gamma)
clf.fit(X_train, y_train)
scores.append(clf.score(X_test, y_test))
plt.semilogx(gammas, scores, 'o-')
plt.xlabel('Gamma')
plt.ylabel('Accuracy')
多项式核 #
python
clf_poly = SVC(
kernel='poly',
degree=3,
gamma='scale',
coef0=1
)
clf_poly.fit(X_train, y_train)
正则化参数 C #
C 的作用 #
| C 值 | 效果 |
|---|---|
| 大 | 严格分类,可能过拟合 |
| 小 | 允许误分类,可能欠拟合 |
python
C_values = [0.01, 0.1, 1, 10, 100]
for C in C_values:
clf = SVC(kernel='rbf', C=C)
clf.fit(X_train, y_train)
train_score = clf.score(X_train, y_train)
test_score = clf.score(X_test, y_test)
print(f"C={C}: Train={train_score:.4f}, Test={test_score:.4f}")
多分类 #
策略 #
| 策略 | 描述 |
|---|---|
ovr |
一对多(One-vs-Rest) |
ovo |
一对一(One-vs-One) |
python
from sklearn.datasets import load_iris
iris = load_iris()
X, y = iris.data, iris.target
clf_ovr = SVC(kernel='rbf', decision_function_shape='ovr')
clf_ovo = SVC(kernel='rbf', decision_function_shape='ovo')
clf_ovr.fit(X, y)
clf_ovo.fit(X, y)
print("OVR decision shape:", clf_ovr.decision_function(X[:5]).shape)
print("OVO decision shape:", clf_ovo.decision_function(X[:5]).shape)
SVM 回归 #
SVR 基本使用 #
python
from sklearn.svm import SVR
X, y = make_regression(n_samples=500, n_features=1, noise=10, random_state=42)
svr = SVR(kernel='rbf', C=1.0, epsilon=0.1)
svr.fit(X, y)
print(f"R²: {svr.score(X, y):.4f}")
epsilon 参数 #
python
epsilons = [0.01, 0.1, 0.5, 1.0]
for eps in epsilons:
svr = SVR(kernel='rbf', epsilon=eps)
svr.fit(X_train, y_train)
print(f"ε={eps}: R²={svr.score(X_test, y_test):.4f}")
决策边界可视化 #
2D 可视化 #
python
from sklearn.datasets import make_moons
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
X, y = make_moons(n_samples=200, noise=0.2, random_state=42)
clf = SVC(kernel='rbf', C=1.0, gamma='scale')
clf.fit(X, y)
x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5
y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5
xx, yy = np.meshgrid(
np.arange(x_min, x_max, 0.02),
np.arange(y_min, y_max, 0.02)
)
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
plt.contourf(xx, yy, Z, alpha=0.4, cmap=ListedColormap(['red', 'blue']))
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=ListedColormap(['red', 'blue']))
plt.title('SVM Decision Boundary (RBF Kernel)')
支持向量可视化 #
python
plt.scatter(X[:, 0], X[:, 1], c=y, cmap='bwr', alpha=0.5)
plt.scatter(
clf.support_vectors_[:, 0],
clf.support_vectors_[:, 1],
s=100, facecolors='none', edgecolors='k'
)
plt.title('Support Vectors')
print(f"支持向量数量: {len(clf.support_vectors_)}")
数据预处理 #
特征缩放的重要性 #
python
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
pipe = Pipeline([
('scaler', StandardScaler()),
('svm', SVC(kernel='rbf'))
])
pipe.fit(X_train, y_train)
print(f"准确率: {pipe.score(X_test, y_test):.4f}")
不缩放 vs 缩放 #
python
clf_no_scale = SVC(kernel='rbf')
clf_no_scale.fit(X_train, y_train)
score_no_scale = clf_no_scale.score(X_test, y_test)
pipe = Pipeline([
('scaler', StandardScaler()),
('svm', SVC(kernel='rbf'))
])
pipe.fit(X_train, y_train)
score_scaled = pipe.score(X_test, y_test)
print(f"不缩放: {score_no_scale:.4f}")
print(f"缩放后: {score_scaled:.4f}")
超参数调优 #
GridSearchCV #
python
from sklearn.model_selection import GridSearchCV
param_grid = {
'C': [0.1, 1, 10, 100],
'gamma': ['scale', 'auto', 0.01, 0.1, 1],
'kernel': ['rbf', 'poly']
}
grid_search = GridSearchCV(
SVC(),
param_grid,
cv=5,
scoring='accuracy',
n_jobs=-1
)
grid_search.fit(X_train, y_train)
print(f"最佳参数: {grid_search.best_params_}")
print(f"最佳分数: {grid_search.best_score_:.4f}")
RandomizedSearchCV #
python
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import loguniform
param_dist = {
'C': loguniform(1e-1, 1e2),
'gamma': loguniform(1e-3, 1e1),
'kernel': ['rbf']
}
random_search = RandomizedSearchCV(
SVC(),
param_dist,
n_iter=50,
cv=5,
random_state=42
)
random_search.fit(X_train, y_train)
大规模数据 #
使用 LinearSVC #
python
from sklearn.svm import LinearSVC
clf = LinearSVC(dual='auto', random_state=42)
clf.fit(X_train_large, y_train_large)
使用 SGDClassifier #
python
from sklearn.linear_model import SGDClassifier
clf = SGDClassifier(loss='hinge', random_state=42)
clf.fit(X_train, y_train)
类别不平衡 #
使用 class_weight #
python
clf = SVC(kernel='rbf', class_weight='balanced')
clf.fit(X_train, y_train)
手动设置权重 #
python
class_weights = {0: 1, 1: 10}
clf = SVC(kernel='rbf', class_weight=class_weights)
实战示例 #
手写数字识别 #
python
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
digits = load_digits()
X, y = digits.data, digits.target
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
pipe = Pipeline([
('scaler', StandardScaler()),
('svm', SVC(kernel='rbf', C=10, gamma=0.01))
])
pipe.fit(X_train, y_train)
print(f"准确率: {pipe.score(X_test, y_test):.4f}")
癌症诊断 #
python
from sklearn.datasets import load_breast_cancer
cancer = load_breast_cancer()
X, y = cancer.data, cancer.target
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
pipe = Pipeline([
('scaler', StandardScaler()),
('svm', SVC(kernel='rbf', class_weight='balanced'))
])
pipe.fit(X_train, y_train)
print(f"准确率: {pipe.score(X_test, y_test):.4f}")
SVM 优缺点 #
优点 #
| 优点 | 描述 |
|---|---|
| 高维有效 | 在高维空间表现良好 |
| 内存高效 | 只使用支持向量 |
| 核函数灵活 | 可适应不同数据分布 |
| 泛化能力强 | 最大间隔原则 |
缺点 #
| 缺点 | 描述 |
|---|---|
| 大数据慢 | 训练时间复杂度高 |
| 参数敏感 | 需要调参 |
| 噪声敏感 | 对噪声和异常值敏感 |
| 可解释性差 | 黑盒模型 |
最佳实践 #
1. 特征缩放 #
python
pipe = Pipeline([
('scaler', StandardScaler()),
('svm', SVC())
])
2. 选择核函数 #
python
if n_features > n_samples:
kernel = 'linear'
else:
kernel = 'rbf'
3. 调参顺序 #
python
C_values = [0.1, 1, 10, 100]
gamma_values = ['scale', 'auto', 0.01, 0.1, 1]
4. 处理不平衡 #
python
clf = SVC(class_weight='balanced')
下一步 #
掌握 SVM 后,继续学习 集成方法 了解更强大的模型组合技术!
最后更新:2026-04-04