模型构建 #
模型类型 #
TensorFlow/Keras 提供了三种主要的模型构建方式:
text
┌─────────────────────────────────────────────────────────────┐
│ 模型构建方式 │
├─────────────────────────────────────────────────────────────┤
│ │
│ Sequential API │
│ ├── 最简单 │
│ ├── 层的线性堆叠 │
│ └── 适用于简单模型 │
│ │
│ Functional API │
│ ├── 灵活 │
│ ├── 支持多输入/多输出 │
│ └── 适用于复杂拓扑 │
│ │
│ Model Subclassing │
│ ├── 最灵活 │
│ ├── 完全自定义 │
│ └── 适用于研究/特殊需求 │
│ │
└─────────────────────────────────────────────────────────────┘
Sequential 模型 #
基本构建 #
python
import tensorflow as tf
# 方式1:列表初始化
model = tf.keras.Sequential([
tf.keras.layers.Dense(64, activation='relu', input_shape=(784,)),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(32, activation='relu'),
tf.keras.layers.Dense(10, activation='softmax')
])
# 方式2:逐层添加
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(64, activation='relu', input_shape=(784,)))
model.add(tf.keras.layers.Dropout(0.2))
model.add(tf.keras.layers.Dense(32, activation='relu'))
model.add(tf.keras.layers.Dense(10, activation='softmax'))
# 查看模型结构
model.summary()
动态构建 #
python
import tensorflow as tf
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(64, activation='relu'))
# 此时模型还没有输入形状
try:
model.summary()
except ValueError as e:
print(f"错误: {e}")
# 添加输入或构建模型
model.build(input_shape=(None, 784))
model.summary()
# 或者通过调用自动构建
x = tf.random.normal([1, 784])
y = model(x)
print(f"输出形状: {y.shape}")
Functional API 模型 #
单输入单输出 #
python
import tensorflow as tf
inputs = tf.keras.Input(shape=(784,), name='input')
x = tf.keras.layers.Dense(64, activation='relu')(inputs)
x = tf.keras.layers.Dropout(0.2)(x)
x = tf.keras.layers.Dense(32, activation='relu')(x)
outputs = tf.keras.layers.Dense(10, activation='softmax', name='output')(x)
model = tf.keras.Model(inputs=inputs, outputs=outputs, name='mnist_classifier')
model.summary()
多输入模型 #
python
import tensorflow as tf
# 图像分支
image_input = tf.keras.Input(shape=(224, 224, 3), name='image')
x1 = tf.keras.layers.Conv2D(32, 3, activation='relu')(image_input)
x1 = tf.keras.layers.MaxPooling2D()(x1)
x1 = tf.keras.layers.Conv2D(64, 3, activation='relu')(x1)
x1 = tf.keras.layers.GlobalAveragePooling2D()(x1)
# 元数据分支
meta_input = tf.keras.Input(shape=(10,), name='metadata')
x2 = tf.keras.layers.Dense(32, activation='relu')(meta_input)
# 合并
concatenated = tf.keras.layers.concatenate([x1, x2])
x = tf.keras.layers.Dense(64, activation='relu')(concatenated)
outputs = tf.keras.layers.Dense(1, activation='sigmoid')(x)
model = tf.keras.Model(
inputs=[image_input, meta_input],
outputs=outputs
)
model.summary()
多输出模型 #
python
import tensorflow as tf
inputs = tf.keras.Input(shape=(100,), name='text')
x = tf.keras.layers.Embedding(10000, 64)(inputs)
x = tf.keras.layers.LSTM(64, return_sequences=True)(x)
# 分类输出
class_output = tf.keras.layers.Dense(10, activation='softmax', name='classification')(x[:, -1, :])
# 序列标注输出
seq_output = tf.keras.layers.Dense(5, activation='softmax', name='sequence')(x)
model = tf.keras.Model(
inputs=inputs,
outputs=[class_output, seq_output]
)
model.compile(
optimizer='adam',
loss={
'classification': 'sparse_categorical_crossentropy',
'sequence': 'sparse_categorical_crossentropy'
},
loss_weights={
'classification': 1.0,
'sequence': 0.5
}
)
Model Subclassing #
基本子类化 #
python
import tensorflow as tf
class MyModel(tf.keras.Model):
def __init__(self, num_classes=10):
super().__init__()
self.dense1 = tf.keras.layers.Dense(64, activation='relu')
self.dropout = tf.keras.layers.Dropout(0.2)
self.dense2 = tf.keras.layers.Dense(32, activation='relu')
self.dense3 = tf.keras.layers.Dense(num_classes, activation='softmax')
def call(self, inputs, training=False):
x = self.dense1(inputs)
x = self.dropout(x, training=training)
x = self.dense2(x)
return self.dense3(x)
model = MyModel(num_classes=10)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')
# 构建模型
x = tf.random.normal([1, 784])
y = model(x)
model.summary()
带自定义逻辑 #
python
import tensorflow as tf
class CustomModel(tf.keras.Model):
def __init__(self, hidden_units, num_classes):
super().__init__()
self.hidden_layers = [
tf.keras.layers.Dense(units, activation='relu')
for units in hidden_units
]
self.output_layer = tf.keras.layers.Dense(num_classes, activation='softmax')
self.dropout = tf.keras.layers.Dropout(0.2)
def call(self, inputs, training=False):
x = inputs
for layer in self.hidden_layers:
x = layer(x)
if training:
x = self.dropout(x)
return self.output_layer(x)
def train_step(self, data):
x, y = data
with tf.GradientTape() as tape:
y_pred = self(x, training=True)
loss = self.compiled_loss(y, y_pred)
gradients = tape.gradient(loss, self.trainable_variables)
self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
self.compiled_metrics.update_state(y, y_pred)
return {m.name: m.result() for m in self.metrics}
model = CustomModel(hidden_units=[128, 64], num_classes=10)
model.compile(
optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy']
)
模型编译 #
基本编译 #
python
import tensorflow as tf
model = tf.keras.Sequential([
tf.keras.layers.Dense(64, activation='relu', input_shape=(784,)),
tf.keras.layers.Dense(10, activation='softmax')
])
model.compile(
optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy']
)
详细配置 #
python
import tensorflow as tf
model.compile(
optimizer=tf.keras.optimizers.Adam(
learning_rate=0.001,
beta_1=0.9,
beta_2=0.999,
epsilon=1e-07
),
loss=tf.keras.losses.SparseCategoricalCrossentropy(
from_logits=False,
reduction=tf.keras.losses.Reduction.SUM_OVER_BATCH_SIZE
),
metrics=[
tf.keras.metrics.SparseCategoricalAccuracy(name='accuracy'),
tf.keras.metrics.SparseTopKCategoricalAccuracy(k=5, name='top5_acc'),
tf.keras.metrics.Mean(name='loss')
],
loss_weights=None,
weighted_metrics=None,
run_eagerly=False,
steps_per_execution=1,
jit_compile='auto'
)
模型保存与加载 #
保存整个模型 #
python
import tensorflow as tf
model = tf.keras.Sequential([
tf.keras.layers.Dense(64, activation='relu', input_shape=(784,)),
tf.keras.layers.Dense(10, activation='softmax')
])
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')
# 保存为 .keras 格式(推荐)
model.save('model.keras')
# 保存为 HDF5 格式
model.save('model.h5')
# 保存为 SavedModel 格式
model.save('saved_model')
# 加载模型
loaded_model = tf.keras.models.load_model('model.keras')
loaded_model = tf.keras.models.load_model('model.h5')
loaded_model = tf.keras.models.load_model('saved_model')
仅保存权重 #
python
import tensorflow as tf
model = tf.keras.Sequential([
tf.keras.layers.Dense(64, activation='relu', input_shape=(784,)),
tf.keras.layers.Dense(10, activation='softmax')
])
# 保存权重
model.save_weights('weights.h5')
model.save_weights('weights')
# 加载权重
model.load_weights('weights.h5')
model.load_weights('weights')
Checkpoint #
python
import tensorflow as tf
model = tf.keras.Sequential([
tf.keras.layers.Dense(64, activation='relu', input_shape=(784,)),
tf.keras.layers.Dense(10, activation='softmax')
])
# 创建检查点回调
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
filepath='checkpoints/model_{epoch:02d}_{val_loss:.4f}.keras',
monitor='val_loss',
save_best_only=True,
save_weights_only=False,
mode='min',
save_freq='epoch'
)
# 训练时保存
model.fit(
x_train, y_train,
epochs=10,
validation_data=(x_val, y_val),
callbacks=[checkpoint_callback]
)
模型可视化 #
模型结构图 #
python
import tensorflow as tf
model = tf.keras.Sequential([
tf.keras.layers.Dense(64, activation='relu', input_shape=(784,)),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(32, activation='relu'),
tf.keras.layers.Dense(10, activation='softmax')
])
# 绘制模型结构图
tf.keras.utils.plot_model(
model,
to_file='model.png',
show_shapes=True,
show_layer_names=True,
rankdir='TB',
expand_nested=True,
dpi=96
)
模型摘要 #
python
import tensorflow as tf
model = tf.keras.Sequential([
tf.keras.layers.Dense(64, activation='relu', input_shape=(784,)),
tf.keras.layers.Dense(10, activation='softmax')
])
# 打印摘要
model.summary()
# 获取摘要字符串
summary_str = []
model.summary(print_fn=lambda x: summary_str.append(x))
print('\n'.join(summary_str))
# 获取配置
config = model.get_config()
print(f"模型配置: {config}")
# 获取权重
weights = model.get_weights()
print(f"权重数量: {len(weights)}")
模型调试 #
检查模型 #
python
import tensorflow as tf
model = tf.keras.Sequential([
tf.keras.layers.Dense(64, activation='relu', input_shape=(784,)),
tf.keras.layers.Dense(10, activation='softmax')
])
# 检查输入输出形状
print(f"输入形状: {model.input_shape}")
print(f"输出形状: {model.output_shape}")
# 检查层
print(f"层数: {len(model.layers)}")
for layer in model.layers:
print(f" {layer.name}: {layer.output_shape}")
# 检查可训练参数
trainable_count = sum([tf.keras.backend.count_params(w) for w in model.trainable_weights])
non_trainable_count = sum([tf.keras.backend.count_params(w) for w in model.non_trainable_weights])
print(f"可训练参数: {trainable_count:,}")
print(f"不可训练参数: {non_trainable_count:,}")
调试前向传播 #
python
import tensorflow as tf
model = tf.keras.Sequential([
tf.keras.layers.Dense(64, activation='relu', input_shape=(784,)),
tf.keras.layers.Dense(10, activation='softmax')
])
# 使用调试模式
tf.config.run_functions_eagerly(True)
# 逐层调试
x = tf.random.normal([1, 784])
for layer in model.layers:
x = layer(x)
print(f"{layer.name}: shape={x.shape}, dtype={x.dtype}")
if tf.reduce_any(tf.math.is_nan(x)):
print(f" 警告: {layer.name} 输出包含 NaN!")
下一步 #
现在你已经掌握了模型构建,接下来学习 模型训练,了解如何高效训练深度学习模型!
最后更新:2026-04-04