PyTorch 迁移学习 #
迁移学习简介 #
迁移学习是一种机器学习技术,将在一个任务上学到的知识应用到另一个相关任务上。
text
┌─────────────────────────────────────────────────────────────┐
│ 迁移学习核心思想 │
├─────────────────────────────────────────────────────────────┤
│ │
│ 传统训练: │
│ 从零开始 ──► 随机初始化 ──► 大量数据训练 │
│ 问题:数据需求大、训练时间长 │
│ │
│ 迁移学习: │
│ 预训练模型 ──► 加载权重 ──► 微调/特征提取 │
│ 优势:数据需求小、训练速度快、效果更好 │
│ │
│ 为什么有效? │
│ - 浅层学习通用特征(边缘、纹理) │
│ - 深层学习特定特征(物体部件) │
│ - 这些特征可以迁移到新任务 │
│ │
└─────────────────────────────────────────────────────────────┘
torchvision 预训练模型 #
查看可用模型 #
python
import torchvision.models as models
print("可用的分类模型:")
classification_models = [
'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152',
'vgg11', 'vgg13', 'vgg16', 'vgg19',
'densenet121', 'densenet169', 'densenet201',
'mobilenet_v2', 'mobilenet_v3_small', 'mobilenet_v3_large',
'efficientnet_b0', 'efficientnet_b1',
'vit_b_16', 'vit_b_32',
'convnext_tiny', 'convnext_small', 'convnext_base'
]
for model_name in classification_models:
print(f" - {model_name}")
加载预训练模型 #
python
import torch
import torchvision.models as models
resnet18 = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
resnet50 = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V2)
vgg16 = models.vgg16(weights=models.VGG16_Weights.IMAGENET1K_V1)
mobilenet = models.mobilenet_v2(weights=models.MobileNet_V2_Weights.IMAGENET1K_V1)
efficientnet = models.efficientnet_b0(weights=models.EfficientNet_B0_Weights.IMAGENET1K_V1)
x = torch.randn(1, 3, 224, 224)
output = resnet18(x)
print(f"ResNet18 输出: {output.shape}")
模型结构查看 #
python
import torchvision.models as models
model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
print("ResNet18 结构:")
print(model)
print("\n最后一层:")
print(model.fc)
print("\n模型参数统计:")
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"总参数: {total_params:,}")
print(f"可训练参数: {trainable_params:,}")
迁移学习策略 #
策略一:特征提取 #
text
┌─────────────────────────────────────────────────────────────┐
│ 特征提取策略 │
├─────────────────────────────────────────────────────────────┤
│ │
│ 冻结预训练层,只训练分类头: │
│ │
│ ┌─────────────────────────────────────┐ │
│ │ 预训练网络(冻结) │ │
│ │ ┌─────────────────────────────┐ │ │
│ │ │ Conv1 ──► Conv2 ──► ... │ │ │
│ │ │ ↓ ↓ │ │ │
│ │ │ 冻结 冻结 │ │ │
│ │ └─────────────────────────────┘ │ │
│ └─────────────────────────────────────┘ │
│ │ │
│ ▼ │
│ ┌─────────────────────────────────────┐ │
│ │ 新分类头(训练) │ │
│ │ Linear ──► Output │ │
│ └─────────────────────────────────────┘ │
│ │
│ 适用场景:数据量小,任务相似 │
│ │
└─────────────────────────────────────────────────────────────┘
python
import torch
import torch.nn as nn
import torchvision.models as models
model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
for param in model.parameters():
param.requires_grad = False
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, 10)
print("可训练参数:")
for name, param in model.named_parameters():
if param.requires_grad:
print(f" {name}: {param.shape}")
x = torch.randn(32, 3, 224, 224)
output = model(x)
print(f"\n输出形状: {output.shape}")
策略二:微调 #
text
┌─────────────────────────────────────────────────────────────┐
│ 微调策略 │
├─────────────────────────────────────────────────────────────┤
│ │
│ 解冻部分或全部层,一起训练: │
│ │
│ ┌─────────────────────────────────────┐ │
│ │ 预训练网络(解冻) │ │
│ │ ┌─────────────────────────────┐ │ │
│ │ │ Conv1 ──► Conv2 ──► ... │ │ │
│ │ │ ↓ ↓ │ │ │
│ │ │ 小lr 小lr │ │ │
│ │ └─────────────────────────────┘ │ │
│ └─────────────────────────────────────┘ │
│ │ │
│ ▼ │
│ ┌─────────────────────────────────────┐ │
│ │ 新分类头(训练) │ │
│ │ Linear ──► Output │ │
│ │ 大学习率 │ │
│ └─────────────────────────────────────┘ │
│ │
│ 适用场景:数据量中等,任务差异较大 │
│ │
└─────────────────────────────────────────────────────────────┘
python
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, 10)
for param in model.parameters():
param.requires_grad = False
for param in model.fc.parameters():
param.requires_grad = True
optimizer = optim.Adam([
{'params': model.fc.parameters(), 'lr': 1e-3}
], lr=1e-3)
for param in model.parameters():
param.requires_grad = True
optimizer = optim.Adam([
{'params': model.conv1.parameters(), 'lr': 1e-5},
{'params': model.layer1.parameters(), 'lr': 1e-5},
{'params': model.layer2.parameters(), 'lr': 1e-4},
{'params': model.layer3.parameters(), 'lr': 1e-4},
{'params': model.layer4.parameters(), 'lr': 1e-4},
{'params': model.fc.parameters(), 'lr': 1e-3}
])
策略三:渐进式解冻 #
python
import torch
import torch.nn as nn
import torchvision.models as models
model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
model.fc = nn.Linear(model.fc.in_features, 10)
def freeze_all(model):
for param in model.parameters():
param.requires_grad = False
def unfreeze_layer(model, layer_name):
for name, param in model.named_parameters():
if layer_name in name:
param.requires_grad = True
def progressive_unfreeze(model, epoch):
if epoch < 5:
freeze_all(model)
for param in model.fc.parameters():
param.requires_grad = True
elif epoch < 10:
unfreeze_layer(model, 'layer4')
elif epoch < 15:
unfreeze_layer(model, 'layer3')
else:
for param in model.parameters():
param.requires_grad = True
for epoch in range(20):
progressive_unfreeze(model, epoch)
trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Epoch {epoch}: 可训练参数 {trainable:,}")
不同模型架构迁移 #
VGG 迁移 #
python
import torch
import torch.nn as nn
import torchvision.models as models
vgg = models.vgg16(weights=models.VGG16_Weights.IMAGENET1K_V1)
for param in vgg.features.parameters():
param.requires_grad = False
vgg.classifier[6] = nn.Linear(4096, 10)
print("VGG16 分类器:")
print(vgg.classifier)
ResNet 迁移 #
python
import torch
import torch.nn as nn
import torchvision.models as models
resnet = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V2)
for param in resnet.parameters():
param.requires_grad = False
resnet.fc = nn.Sequential(
nn.Linear(resnet.fc.in_features, 512),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(512, 10)
)
print("ResNet50 新分类头:")
print(resnet.fc)
EfficientNet 迁移 #
python
import torch
import torch.nn as nn
import torchvision.models as models
efficientnet = models.efficientnet_b0(weights=models.EfficientNet_B0_Weights.IMAGENET1K_V1)
for param in efficientnet.parameters():
param.requires_grad = False
num_features = efficientnet.classifier[1].in_features
efficientnet.classifier[1] = nn.Linear(num_features, 10)
print("EfficientNet-B0 分类器:")
print(efficientnet.classifier)
Vision Transformer 迁移 #
python
import torch
import torch.nn as nn
import torchvision.models as models
vit = models.vit_b_16(weights=models.ViT_B_16_Weights.IMAGENET1K_V1)
for param in vit.parameters():
param.requires_grad = False
vit.heads.head = nn.Linear(vit.heads.head.in_features, 10)
print("ViT-B/16 分类头:")
print(vit.heads)
数据预处理 #
标准预处理 #
python
from torchvision import transforms
transform = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]
)
])
训练数据增强 #
python
from torchvision import transforms
train_transform = transforms.Compose([
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.RandomRotation(15),
transforms.ColorJitter(
brightness=0.2,
contrast=0.2,
saturation=0.2,
hue=0.1
),
transforms.ToTensor(),
transforms.Normalize(
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]
)
])
val_transform = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]
)
])
完整训练示例 #
python
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models
from torch.optim.lr_scheduler import CosineAnnealingLR
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
train_transform = transforms.Compose([
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
val_transform = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
train_dataset = datasets.CIFAR10('./data', train=True, download=True, transform=train_transform)
val_dataset = datasets.CIFAR10('./data', train=False, download=True, transform=val_transform)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=4)
model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
for param in model.parameters():
param.requires_grad = False
model.fc = nn.Linear(model.fc.in_features, 10)
model = model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=0.001)
scheduler = CosineAnnealingLR(optimizer, T_max=10)
def train_one_epoch(model, dataloader, criterion, optimizer, device):
model.train()
running_loss = 0.0
correct = 0
total = 0
for inputs, labels in dataloader:
inputs, labels = inputs.to(device), labels.to(device)
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
_, predicted = outputs.max(1)
total += labels.size(0)
correct += predicted.eq(labels).sum().item()
return running_loss / len(dataloader), 100. * correct / total
def validate(model, dataloader, criterion, device):
model.eval()
running_loss = 0.0
correct = 0
total = 0
with torch.no_grad():
for inputs, labels in dataloader:
inputs, labels = inputs.to(device), labels.to(device)
outputs = model(inputs)
loss = criterion(outputs, labels)
running_loss += loss.item()
_, predicted = outputs.max(1)
total += labels.size(0)
correct += predicted.eq(labels).sum().item()
return running_loss / len(dataloader), 100. * correct / total
for epoch in range(10):
train_loss, train_acc = train_one_epoch(model, train_loader, criterion, optimizer, device)
val_loss, val_acc = validate(model, val_loader, criterion, device)
scheduler.step()
print(f"Epoch {epoch+1}/10")
print(f" Train - Loss: {train_loss:.4f}, Acc: {train_acc:.2f}%")
print(f" Val - Loss: {val_loss:.4f}, Acc: {val_acc:.2f}%")
微调技巧 #
差异化学习率 #
python
import torch.optim as optim
base_lr = 1e-4
classifier_lr = 1e-3
optimizer = optim.Adam([
{'params': model.conv1.parameters(), 'lr': base_lr * 0.1},
{'params': model.bn1.parameters(), 'lr': base_lr * 0.1},
{'params': model.layer1.parameters(), 'lr': base_lr * 0.5},
{'params': model.layer2.parameters(), 'lr': base_lr * 0.5},
{'params': model.layer3.parameters(), 'lr': base_lr},
{'params': model.layer4.parameters(), 'lr': base_lr},
{'params': model.fc.parameters(), 'lr': classifier_lr}
])
Warmup + Cosine #
python
import torch
import math
from torch.optim.lr_scheduler import _LRScheduler
class WarmupCosineScheduler(_LRScheduler):
def __init__(self, optimizer, warmup_epochs, total_epochs, min_lr=0, last_epoch=-1):
self.warmup_epochs = warmup_epochs
self.total_epochs = total_epochs
self.min_lr = min_lr
super().__init__(optimizer, last_epoch)
def get_lr(self):
if self.last_epoch < self.warmup_epochs:
return [base_lr * self.last_epoch / self.warmup_epochs
for base_lr in self.base_lrs]
progress = (self.last_epoch - self.warmup_epochs) / (self.total_epochs - self.warmup_epochs)
return [self.min_lr + (base_lr - self.min_lr) * 0.5 * (1 + math.cos(math.pi * progress))
for base_lr in self.base_lrs]
scheduler = WarmupCosineScheduler(optimizer, warmup_epochs=5, total_epochs=100)
Label Smoothing #
python
import torch
import torch.nn as nn
import torch.nn.functional as F
class LabelSmoothingLoss(nn.Module):
def __init__(self, classes, smoothing=0.1):
super().__init__()
self.confidence = 1.0 - smoothing
self.smoothing = smoothing
self.classes = classes
def forward(self, pred, target):
pred = pred.log_softmax(dim=-1)
with torch.no_grad():
true_dist = torch.zeros_like(pred)
true_dist.fill_(self.smoothing / (self.classes - 1))
true_dist.scatter_(1, target.unsqueeze(1), self.confidence)
return torch.mean(torch.sum(-true_dist * pred, dim=-1))
criterion = LabelSmoothingLoss(classes=10, smoothing=0.1)
下一步 #
现在你已经掌握了 PyTorch 迁移学习的核心概念,接下来学习 GPU 加速,了解如何高效利用 GPU 进行训练!
最后更新:2026-03-29