CNN 图像分类实战 #
项目概述 #
本节将使用 TensorFlow 构建一个完整的图像分类项目,从数据准备到模型部署。
项目流程 #
text
┌─────────────────────────────────────────────────────────────┐
│ 图像分类流程 │
├─────────────────────────────────────────────────────────────┤
│ │
│ 数据准备 ──► 数据增强 ──► 模型构建 ──► 训练 │
│ │ │ │
│ ▼ ▼ │
│ 数据加载 评估调优 ──► 部署 │
│ │
└─────────────────────────────────────────────────────────────┘
MNIST 手写数字识别 #
数据准备 #
python
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
# 加载 MNIST 数据集
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
# 数据预处理
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0
# 添加通道维度
x_train = x_train[..., tf.newaxis]
x_test = x_test[..., tf.newaxis]
print(f"训练集形状: {x_train.shape}")
print(f"测试集形状: {x_test.shape}")
# 可视化样本
fig, axes = plt.subplots(2, 5, figsize=(10, 4))
for i, ax in enumerate(axes.flat):
ax.imshow(x_train[i, :, :, 0], cmap='gray')
ax.set_title(f"Label: {y_train[i]}")
ax.axis('off')
plt.tight_layout()
plt.savefig('mnist_samples.png')
构建模型 #
python
import tensorflow as tf
model = tf.keras.Sequential([
tf.keras.layers.Conv2D(32, 3, activation='relu', input_shape=(28, 28, 1)),
tf.keras.layers.MaxPooling2D(),
tf.keras.layers.Conv2D(64, 3, activation='relu'),
tf.keras.layers.MaxPooling2D(),
tf.keras.layers.Conv2D(64, 3, activation='relu'),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(64, activation='relu'),
tf.keras.layers.Dropout(0.5),
tf.keras.layers.Dense(10, activation='softmax')
])
model.compile(
optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy']
)
model.summary()
训练模型 #
python
import tensorflow as tf
callbacks = [
tf.keras.callbacks.EarlyStopping(
monitor='val_loss',
patience=5,
restore_best_weights=True
),
tf.keras.callbacks.ModelCheckpoint(
'best_model.keras',
monitor='val_accuracy',
save_best_only=True
)
]
history = model.fit(
x_train, y_train,
batch_size=64,
epochs=20,
validation_split=0.2,
callbacks=callbacks
)
# 评估模型
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f"测试准确率: {test_acc:.4f}")
CIFAR-10 图像分类 #
数据准备 #
python
import tensorflow as tf
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0
class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer',
'dog', 'frog', 'horse', 'ship', 'truck']
print(f"训练集形状: {x_train.shape}")
print(f"类别数: {len(class_names)}")
数据增强 #
python
import tensorflow as tf
data_augmentation = tf.keras.Sequential([
tf.keras.layers.RandomFlip('horizontal'),
tf.keras.layers.RandomRotation(0.1),
tf.keras.layers.RandomZoom(0.1),
tf.keras.layers.RandomContrast(0.1),
])
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_dataset = train_dataset.shuffle(50000).batch(64)
def augment_data(image, label):
image = data_augmentation(image)
return image, label
train_dataset = train_dataset.map(augment_data, num_parallel_calls=tf.data.AUTOTUNE)
train_dataset = train_dataset.prefetch(tf.data.AUTOTUNE)
test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test))
test_dataset = test_dataset.batch(64).prefetch(tf.data.AUTOTUNE)
构建深度 CNN #
python
import tensorflow as tf
def conv_block(x, filters, kernel_size=3, dropout_rate=0.2):
x = tf.keras.layers.Conv2D(filters, kernel_size, padding='same')(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation('relu')(x)
x = tf.keras.layers.Conv2D(filters, kernel_size, padding='same')(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation('relu')(x)
x = tf.keras.layers.MaxPooling2D()(x)
x = tf.keras.layers.Dropout(dropout_rate)(x)
return x
inputs = tf.keras.Input(shape=(32, 32, 3))
x = tf.keras.layers.Rescaling(1./255)(inputs)
x = conv_block(x, 32, dropout_rate=0.2)
x = conv_block(x, 64, dropout_rate=0.3)
x = conv_block(x, 128, dropout_rate=0.4)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dense(128, activation='relu')(x)
x = tf.keras.layers.Dropout(0.5)(x)
outputs = tf.keras.layers.Dense(10, activation='softmax')(x)
model = tf.keras.Model(inputs, outputs)
model.compile(
optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
loss='sparse_categorical_crossentropy',
metrics=['accuracy']
)
model.summary()
训练与评估 #
python
import tensorflow as tf
callbacks = [
tf.keras.callbacks.EarlyStopping(
monitor='val_loss',
patience=10,
restore_best_weights=True
),
tf.keras.callbacks.ReduceLROnPlateau(
monitor='val_loss',
factor=0.5,
patience=5,
min_lr=1e-6
),
tf.keras.callbacks.ModelCheckpoint(
'cifar10_best.keras',
monitor='val_accuracy',
save_best_only=True
)
]
history = model.fit(
train_dataset,
epochs=100,
validation_data=test_dataset,
callbacks=callbacks
)
test_loss, test_acc = model.evaluate(test_dataset)
print(f"测试准确率: {test_acc:.4f}")
迁移学习 #
使用预训练模型 #
python
import tensorflow as tf
IMG_SIZE = 224
BATCH_SIZE = 32
base_model = tf.keras.applications.ResNet50V2(
weights='imagenet',
include_top=False,
input_shape=(IMG_SIZE, IMG_SIZE, 3)
)
base_model.trainable = False
model = tf.keras.Sequential([
tf.keras.layers.Resizing(IMG_SIZE, IMG_SIZE),
tf.keras.layers.Rescaling(1./255),
base_model,
tf.keras.layers.GlobalAveragePooling2D(),
tf.keras.layers.Dense(256, activation='relu'),
tf.keras.layers.Dropout(0.5),
tf.keras.layers.Dense(10, activation='softmax')
])
model.compile(
optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy']
)
history = model.fit(
train_dataset,
epochs=10,
validation_data=test_dataset
)
微调模型 #
python
import tensorflow as tf
base_model.trainable = True
for layer in base_model.layers[:-20]:
layer.trainable = False
model.compile(
optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),
loss='sparse_categorical_crossentropy',
metrics=['accuracy']
)
history_fine = model.fit(
train_dataset,
epochs=20,
validation_data=test_dataset
)
模型可视化 #
训练曲线 #
python
import matplotlib.pyplot as plt
def plot_history(history):
fig, axes = plt.subplots(1, 2, figsize=(12, 4))
axes[0].plot(history.history['loss'], label='Train Loss')
axes[0].plot(history.history['val_loss'], label='Val Loss')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss')
axes[0].legend()
axes[0].set_title('Loss Curve')
axes[1].plot(history.history['accuracy'], label='Train Acc')
axes[1].plot(history.history['val_accuracy'], label='Val Acc')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Accuracy')
axes[1].legend()
axes[1].set_title('Accuracy Curve')
plt.tight_layout()
plt.savefig('training_history.png')
plot_history(history)
混淆矩阵 #
python
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import seaborn as sns
y_pred = model.predict(x_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = y_test.flatten()
cm = confusion_matrix(y_true, y_pred_classes)
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
xticklabels=class_names, yticklabels=class_names)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.savefig('confusion_matrix.png')
模型保存与预测 #
python
import tensorflow as tf
model.save('cifar10_model.keras')
loaded_model = tf.keras.models.load_model('cifar10_model.keras')
predictions = loaded_model.predict(x_test[:10])
predicted_classes = np.argmax(predictions, axis=1)
for i in range(10):
print(f"真实: {class_names[y_test[i][0]]}, 预测: {class_names[predicted_classes[i]]}")
下一步 #
现在你已经完成了 CNN 图像分类实战,接下来学习 RNN 序列模型,了解如何处理序列数据!
最后更新:2026-04-04