Keras API #

什么是 Keras? #

Keras 是 TensorFlow 的高级神经网络 API,提供了简洁、模块化的接口来构建和训练深度学习模型。它支持快速原型设计,同时具有足够的灵活性来构建复杂的模型架构。

Keras 的优势 #

text
┌─────────────────────────────────────────────────────────────┐
│                    Keras 特点                                │
├─────────────────────────────────────────────────────────────┤
│  ┌─────────────┐  ┌─────────────┐  ┌─────────────┐         │
│  │  简洁易用    │  │  模块化     │  │  可扩展     │         │
│  └─────────────┘  └─────────────┘  └─────────────┘         │
│  ┌─────────────┐  ┌─────────────┐  ┌─────────────┐         │
│  │  快速原型    │  │  生产就绪   │  │  多后端     │         │
│  └─────────────┘  └─────────────┘  └─────────────┘         │
└─────────────────────────────────────────────────────────────┘

Sequential API #

Sequential API 是最简单的模型构建方式,适用于层的线性堆叠。

基本用法 #

python
import tensorflow as tf

# 方式1:直接传入层列表
model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(784,)),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(10, activation='softmax')
])

# 方式2:逐层添加
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(64, activation='relu', input_shape=(784,)))
model.add(tf.keras.layers.Dropout(0.2))
model.add(tf.keras.layers.Dense(32, activation='relu'))
model.add(tf.keras.layers.Dense(10, activation='softmax'))

# 查看模型结构
model.summary()

输入形状 #

python
import tensorflow as tf

# 指定输入形状
model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, input_shape=(784,)),
    tf.keras.layers.Dense(10)
])

# 使用 Input 层
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(784,)),
    tf.keras.layers.Dense(64),
    tf.keras.layers.Dense(10)
])

# 输入形状会自动推断
model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, input_shape=(784,)),
    tf.keras.layers.Dense(10)
])
print(f"输入形状: {model.input_shape}")
print(f"输出形状: {model.output_shape}")

图像模型示例 #

python
import tensorflow as tf

# CNN 图像分类模型
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, 3, activation='relu', input_shape=(28, 28, 1)),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Conv2D(64, 3, activation='relu'),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Conv2D(64, 3, activation='relu'),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(10, activation='softmax')
])

model.summary()

Functional API #

Functional API 提供更灵活的模型构建方式,支持多输入、多输出和复杂的网络拓扑。

基本用法 #

python
import tensorflow as tf

# 定义输入
inputs = tf.keras.Input(shape=(784,))

# 定义层连接
x = tf.keras.layers.Dense(64, activation='relu')(inputs)
x = tf.keras.layers.Dropout(0.2)(x)
x = tf.keras.layers.Dense(32, activation='relu')(x)
outputs = tf.keras.layers.Dense(10, activation='softmax')(x)

# 创建模型
model = tf.keras.Model(inputs=inputs, outputs=outputs)

model.summary()

多输入模型 #

python
import tensorflow as tf

# 图像输入
image_input = tf.keras.Input(shape=(224, 224, 3), name='image')
x1 = tf.keras.layers.Conv2D(32, 3, activation='relu')(image_input)
x1 = tf.keras.layers.GlobalAveragePooling2D()(x1)

# 文本输入
text_input = tf.keras.Input(shape=(100,), name='text')
x2 = tf.keras.layers.Embedding(10000, 64)(text_input)
x2 = tf.keras.layers.LSTM(32)(x2)

# 合并
concatenated = tf.keras.layers.concatenate([x1, x2])
x = tf.keras.layers.Dense(64, activation='relu')(concatenated)
output = tf.keras.layers.Dense(10, activation='softmax')(x)

model = tf.keras.Model(
    inputs=[image_input, text_input],
    outputs=output
)

model.summary()

多输出模型 #

python
import tensorflow as tf

# 输入
inputs = tf.keras.Input(shape=(100,), name='text')
x = tf.keras.layers.Embedding(10000, 64)(inputs)
x = tf.keras.layers.LSTM(64, return_sequences=True)(x)

# 分类输出
class_output = tf.keras.layers.Dense(10, activation='softmax', name='classification')(x[:, -1, :])

# 序列标注输出
seq_output = tf.keras.layers.Dense(5, activation='softmax', name='sequence')(x)

model = tf.keras.Model(
    inputs=inputs,
    outputs=[class_output, seq_output]
)

model.summary()

共享层 #

python
import tensorflow as tf

# 共享嵌入层
shared_embedding = tf.keras.layers.Embedding(10000, 64)

# 两个输入共享同一个嵌入层
input_a = tf.keras.Input(shape=(100,), name='input_a')
input_b = tf.keras.Input(shape=(100,), name='input_b')

embedded_a = shared_embedding(input_a)
embedded_b = shared_embedding(input_b)

# 各自处理
lstm_a = tf.keras.layers.LSTM(32)(embedded_a)
lstm_b = tf.keras.layers.LSTM(32)(embedded_b)

# 合并
merged = tf.keras.layers.concatenate([lstm_a, lstm_b])
output = tf.keras.layers.Dense(1, activation='sigmoid')(merged)

model = tf.keras.Model(
    inputs=[input_a, input_b],
    outputs=output
)

model.summary()

残差连接 #

python
import tensorflow as tf

def residual_block(x, filters):
    shortcut = x
    
    x = tf.keras.layers.Conv2D(filters, 3, padding='same')(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.ReLU()(x)
    
    x = tf.keras.layers.Conv2D(filters, 3, padding='same')(x)
    x = tf.keras.layers.BatchNormalization()(x)
    
    if shortcut.shape[-1] != filters:
        shortcut = tf.keras.layers.Conv2D(filters, 1)(shortcut)
    
    x = tf.keras.layers.Add()([x, shortcut])
    x = tf.keras.layers.ReLU()(x)
    
    return x

inputs = tf.keras.Input(shape=(224, 224, 3))
x = tf.keras.layers.Conv2D(64, 7, strides=2, padding='same')(inputs)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.ReLU()(x)
x = tf.keras.layers.MaxPooling2D(3, strides=2, padding='same')(x)

x = residual_block(x, 64)
x = residual_block(x, 64)
x = residual_block(x, 128)

x = tf.keras.layers.GlobalAveragePooling2D()(x)
outputs = tf.keras.layers.Dense(1000, activation='softmax')(x)

model = tf.keras.Model(inputs, outputs)
model.summary()

模型编译 #

编译配置 #

python
import tensorflow as tf

model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(784,)),
    tf.keras.layers.Dense(10, activation='softmax')
])

# 编译模型
model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

# 详细配置
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(),
    metrics=[
        tf.keras.metrics.SparseCategoricalAccuracy(name='accuracy'),
        tf.keras.metrics.SparseTopKCategoricalAccuracy(k=5, name='top5_acc')
    ]
)

常用优化器 #

python
import tensorflow as tf

optimizers = {
    'sgd': tf.keras.optimizers.SGD(learning_rate=0.01),
    'sgd_momentum': tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9),
    'adam': tf.keras.optimizers.Adam(learning_rate=0.001),
    'adamw': tf.keras.optimizers.AdamW(learning_rate=0.001, weight_decay=0.01),
    'rmsprop': tf.keras.optimizers.RMSprop(learning_rate=0.001),
    'adagrad': tf.keras.optimizers.Adagrad(learning_rate=0.01),
    'adadelta': tf.keras.optimizers.Adadelta(learning_rate=1.0),
    'nadam': tf.keras.optimizers.Nadam(learning_rate=0.001),
    'ftrl': tf.keras.optimizers.Ftrl(learning_rate=0.01)
}

model.compile(optimizer=optimizers['adam'], loss='mse')

常用损失函数 #

python
import tensorflow as tf

losses = {
    'mse': tf.keras.losses.MeanSquaredError(),
    'mae': tf.keras.losses.MeanAbsoluteError(),
    'binary_crossentropy': tf.keras.losses.BinaryCrossentropy(),
    'categorical_crossentropy': tf.keras.losses.CategoricalCrossentropy(),
    'sparse_categorical_crossentropy': tf.keras.losses.SparseCategoricalCrossentropy(),
    'hinge': tf.keras.losses.Hinge(),
    'huber': tf.keras.losses.Huber()
}

model.compile(optimizer='adam', loss=losses['mse'])

模型训练 #

fit 方法 #

python
import tensorflow as tf
import numpy as np

# 创建模型
model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(784,)),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(10, activation='softmax')
])

model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

# 生成示例数据
x_train = np.random.random((1000, 784))
y_train = np.random.randint(10, size=(1000,))
x_val = np.random.random((200, 784))
y_val = np.random.randint(10, size=(200,))

# 训练
history = model.fit(
    x_train, y_train,
    batch_size=32,
    epochs=10,
    validation_data=(x_val, y_val)
)

使用 tf.data #

python
import tensorflow as tf

# 创建数据集
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_dataset = train_dataset.shuffle(1000).batch(32)

val_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val))
val_dataset = val_dataset.batch(32)

# 训练
history = model.fit(
    train_dataset,
    epochs=10,
    validation_data=val_dataset
)

回调函数 #

python
import tensorflow as tf

callbacks = [
    # 早停
    tf.keras.callbacks.EarlyStopping(
        monitor='val_loss',
        patience=5,
        restore_best_weights=True
    ),
    
    # 学习率衰减
    tf.keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=3,
        min_lr=1e-6
    ),
    
    # 模型检查点
    tf.keras.callbacks.ModelCheckpoint(
        'best_model.keras',
        monitor='val_accuracy',
        save_best_only=True
    ),
    
    # TensorBoard
    tf.keras.callbacks.TensorBoard(
        log_dir='./logs',
        histogram_freq=1
    ),
    
    # CSV 日志
    tf.keras.callbacks.CSVLogger('training.log'),
    
    # 自定义回调
    tf.keras.callbacks.LambdaCallback(
        on_epoch_end=lambda epoch, logs: print(f"Epoch {epoch}: val_acc = {logs['val_accuracy']:.4f}")
    )
]

history = model.fit(
    x_train, y_train,
    epochs=50,
    validation_data=(x_val, y_val),
    callbacks=callbacks
)

模型评估与预测 #

python
import tensorflow as tf

# 评估
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f"测试损失: {test_loss:.4f}")
print(f"测试准确率: {test_acc:.4f}")

# 预测
predictions = model.predict(x_test)
print(f"预测形状: {predictions.shape}")
print(f"第一个预测: {predictions[0]}")

# 获取预测类别
predicted_classes = tf.argmax(predictions, axis=1)
print(f"预测类别: {predicted_classes[:10]}")

下一步 #

现在你已经掌握了 Keras API,接下来学习 网络层,了解各种常用的神经网络层!

最后更新:2026-04-04