模型构建 #

模型类型 #

TensorFlow/Keras 提供了三种主要的模型构建方式：

text

┌─────────────────────────────────────────────────────────────┐
│                    模型构建方式                              │
├─────────────────────────────────────────────────────────────┤
│                                                             │
│  Sequential API                                             │
│  ├── 最简单                                                 │
│  ├── 层的线性堆叠                                           │
│  └── 适用于简单模型                                         │
│                                                             │
│  Functional API                                             │
│  ├── 灵活                                                   │
│  ├── 支持多输入/多输出                                      │
│  └── 适用于复杂拓扑                                         │
│                                                             │
│  Model Subclassing                                          │
│  ├── 最灵活                                                 │
│  ├── 完全自定义                                             │
│  └── 适用于研究/特殊需求                                    │
│                                                             │
└─────────────────────────────────────────────────────────────┘

Sequential 模型 #

基本构建 #

python

import tensorflow as tf

# 方式1：列表初始化
model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(784,)),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(10, activation='softmax')
])

# 方式2：逐层添加
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(64, activation='relu', input_shape=(784,)))
model.add(tf.keras.layers.Dropout(0.2))
model.add(tf.keras.layers.Dense(32, activation='relu'))
model.add(tf.keras.layers.Dense(10, activation='softmax'))

# 查看模型结构
model.summary()

动态构建 #

python

import tensorflow as tf

model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(64, activation='relu'))

# 此时模型还没有输入形状
try:
    model.summary()
except ValueError as e:
    print(f"错误: {e}")

# 添加输入或构建模型
model.build(input_shape=(None, 784))
model.summary()

# 或者通过调用自动构建
x = tf.random.normal([1, 784])
y = model(x)
print(f"输出形状: {y.shape}")

Functional API 模型 #

单输入单输出 #

python

import tensorflow as tf

inputs = tf.keras.Input(shape=(784,), name='input')
x = tf.keras.layers.Dense(64, activation='relu')(inputs)
x = tf.keras.layers.Dropout(0.2)(x)
x = tf.keras.layers.Dense(32, activation='relu')(x)
outputs = tf.keras.layers.Dense(10, activation='softmax', name='output')(x)

model = tf.keras.Model(inputs=inputs, outputs=outputs, name='mnist_classifier')
model.summary()

多输入模型 #

python

import tensorflow as tf

# 图像分支
image_input = tf.keras.Input(shape=(224, 224, 3), name='image')
x1 = tf.keras.layers.Conv2D(32, 3, activation='relu')(image_input)
x1 = tf.keras.layers.MaxPooling2D()(x1)
x1 = tf.keras.layers.Conv2D(64, 3, activation='relu')(x1)
x1 = tf.keras.layers.GlobalAveragePooling2D()(x1)

# 元数据分支
meta_input = tf.keras.Input(shape=(10,), name='metadata')
x2 = tf.keras.layers.Dense(32, activation='relu')(meta_input)

# 合并
concatenated = tf.keras.layers.concatenate([x1, x2])
x = tf.keras.layers.Dense(64, activation='relu')(concatenated)
outputs = tf.keras.layers.Dense(1, activation='sigmoid')(x)

model = tf.keras.Model(
    inputs=[image_input, meta_input],
    outputs=outputs
)

model.summary()

多输出模型 #

python

import tensorflow as tf

inputs = tf.keras.Input(shape=(100,), name='text')
x = tf.keras.layers.Embedding(10000, 64)(inputs)
x = tf.keras.layers.LSTM(64, return_sequences=True)(x)

# 分类输出
class_output = tf.keras.layers.Dense(10, activation='softmax', name='classification')(x[:, -1, :])

# 序列标注输出
seq_output = tf.keras.layers.Dense(5, activation='softmax', name='sequence')(x)

model = tf.keras.Model(
    inputs=inputs,
    outputs=[class_output, seq_output]
)

model.compile(
    optimizer='adam',
    loss={
        'classification': 'sparse_categorical_crossentropy',
        'sequence': 'sparse_categorical_crossentropy'
    },
    loss_weights={
        'classification': 1.0,
        'sequence': 0.5
    }
)

Model Subclassing #

基本子类化 #

python

import tensorflow as tf

class MyModel(tf.keras.Model):
    def __init__(self, num_classes=10):
        super().__init__()
        self.dense1 = tf.keras.layers.Dense(64, activation='relu')
        self.dropout = tf.keras.layers.Dropout(0.2)
        self.dense2 = tf.keras.layers.Dense(32, activation='relu')
        self.dense3 = tf.keras.layers.Dense(num_classes, activation='softmax')
    
    def call(self, inputs, training=False):
        x = self.dense1(inputs)
        x = self.dropout(x, training=training)
        x = self.dense2(x)
        return self.dense3(x)

model = MyModel(num_classes=10)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')

# 构建模型
x = tf.random.normal([1, 784])
y = model(x)
model.summary()

带自定义逻辑 #

python

import tensorflow as tf

class CustomModel(tf.keras.Model):
    def __init__(self, hidden_units, num_classes):
        super().__init__()
        self.hidden_layers = [
            tf.keras.layers.Dense(units, activation='relu')
            for units in hidden_units
        ]
        self.output_layer = tf.keras.layers.Dense(num_classes, activation='softmax')
        self.dropout = tf.keras.layers.Dropout(0.2)
    
    def call(self, inputs, training=False):
        x = inputs
        for layer in self.hidden_layers:
            x = layer(x)
            if training:
                x = self.dropout(x)
        return self.output_layer(x)
    
    def train_step(self, data):
        x, y = data
        
        with tf.GradientTape() as tape:
            y_pred = self(x, training=True)
            loss = self.compiled_loss(y, y_pred)
        
        gradients = tape.gradient(loss, self.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
        
        self.compiled_metrics.update_state(y, y_pred)
        return {m.name: m.result() for m in self.metrics}

model = CustomModel(hidden_units=[128, 64], num_classes=10)
model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

模型编译 #

基本编译 #

python

import tensorflow as tf

model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(784,)),
    tf.keras.layers.Dense(10, activation='softmax')
])

model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

详细配置 #

python

import tensorflow as tf

model.compile(
    optimizer=tf.keras.optimizers.Adam(
        learning_rate=0.001,
        beta_1=0.9,
        beta_2=0.999,
        epsilon=1e-07
    ),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(
        from_logits=False,
        reduction=tf.keras.losses.Reduction.SUM_OVER_BATCH_SIZE
    ),
    metrics=[
        tf.keras.metrics.SparseCategoricalAccuracy(name='accuracy'),
        tf.keras.metrics.SparseTopKCategoricalAccuracy(k=5, name='top5_acc'),
        tf.keras.metrics.Mean(name='loss')
    ],
    loss_weights=None,
    weighted_metrics=None,
    run_eagerly=False,
    steps_per_execution=1,
    jit_compile='auto'
)

模型保存与加载 #

保存整个模型 #

python

import tensorflow as tf

model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(784,)),
    tf.keras.layers.Dense(10, activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')

# 保存为 .keras 格式（推荐）
model.save('model.keras')

# 保存为 HDF5 格式
model.save('model.h5')

# 保存为 SavedModel 格式
model.save('saved_model')

# 加载模型
loaded_model = tf.keras.models.load_model('model.keras')
loaded_model = tf.keras.models.load_model('model.h5')
loaded_model = tf.keras.models.load_model('saved_model')

仅保存权重 #

python

import tensorflow as tf

model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(784,)),
    tf.keras.layers.Dense(10, activation='softmax')
])

# 保存权重
model.save_weights('weights.h5')
model.save_weights('weights')

# 加载权重
model.load_weights('weights.h5')
model.load_weights('weights')

Checkpoint #

python

import tensorflow as tf

model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(784,)),
    tf.keras.layers.Dense(10, activation='softmax')
])

# 创建检查点回调
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath='checkpoints/model_{epoch:02d}_{val_loss:.4f}.keras',
    monitor='val_loss',
    save_best_only=True,
    save_weights_only=False,
    mode='min',
    save_freq='epoch'
)

# 训练时保存
model.fit(
    x_train, y_train,
    epochs=10,
    validation_data=(x_val, y_val),
    callbacks=[checkpoint_callback]
)

模型可视化 #

模型结构图 #

python

import tensorflow as tf

model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(784,)),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(10, activation='softmax')
])

# 绘制模型结构图
tf.keras.utils.plot_model(
    model,
    to_file='model.png',
    show_shapes=True,
    show_layer_names=True,
    rankdir='TB',
    expand_nested=True,
    dpi=96
)

模型摘要 #

python

import tensorflow as tf

model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(784,)),
    tf.keras.layers.Dense(10, activation='softmax')
])

# 打印摘要
model.summary()

# 获取摘要字符串
summary_str = []
model.summary(print_fn=lambda x: summary_str.append(x))
print('\n'.join(summary_str))

# 获取配置
config = model.get_config()
print(f"模型配置: {config}")

# 获取权重
weights = model.get_weights()
print(f"权重数量: {len(weights)}")

模型调试 #

检查模型 #

python

import tensorflow as tf

model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(784,)),
    tf.keras.layers.Dense(10, activation='softmax')
])

# 检查输入输出形状
print(f"输入形状: {model.input_shape}")
print(f"输出形状: {model.output_shape}")

# 检查层
print(f"层数: {len(model.layers)}")
for layer in model.layers:
    print(f"  {layer.name}: {layer.output_shape}")

# 检查可训练参数
trainable_count = sum([tf.keras.backend.count_params(w) for w in model.trainable_weights])
non_trainable_count = sum([tf.keras.backend.count_params(w) for w in model.non_trainable_weights])
print(f"可训练参数: {trainable_count:,}")
print(f"不可训练参数: {non_trainable_count:,}")

调试前向传播 #

python

import tensorflow as tf

model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(784,)),
    tf.keras.layers.Dense(10, activation='softmax')
])

# 使用调试模式
tf.config.run_functions_eagerly(True)

# 逐层调试
x = tf.random.normal([1, 784])
for layer in model.layers:
    x = layer(x)
    print(f"{layer.name}: shape={x.shape}, dtype={x.dtype}")
    if tf.reduce_any(tf.math.is_nan(x)):
        print(f"  警告: {layer.name} 输出包含 NaN!")

下一步 #

现在你已经掌握了模型构建，接下来学习模型训练，了解如何高效训练深度学习模型！