Keras API #
什么是 Keras? #
Keras 是 TensorFlow 的高级神经网络 API,提供了简洁、模块化的接口来构建和训练深度学习模型。它支持快速原型设计,同时具有足够的灵活性来构建复杂的模型架构。
Keras 的优势 #
text
┌─────────────────────────────────────────────────────────────┐
│ Keras 特点 │
├─────────────────────────────────────────────────────────────┤
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
│ │ 简洁易用 │ │ 模块化 │ │ 可扩展 │ │
│ └─────────────┘ └─────────────┘ └─────────────┘ │
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
│ │ 快速原型 │ │ 生产就绪 │ │ 多后端 │ │
│ └─────────────┘ └─────────────┘ └─────────────┘ │
└─────────────────────────────────────────────────────────────┘
Sequential API #
Sequential API 是最简单的模型构建方式,适用于层的线性堆叠。
基本用法 #
python
import tensorflow as tf
# 方式1:直接传入层列表
model = tf.keras.Sequential([
tf.keras.layers.Dense(64, activation='relu', input_shape=(784,)),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(32, activation='relu'),
tf.keras.layers.Dense(10, activation='softmax')
])
# 方式2:逐层添加
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(64, activation='relu', input_shape=(784,)))
model.add(tf.keras.layers.Dropout(0.2))
model.add(tf.keras.layers.Dense(32, activation='relu'))
model.add(tf.keras.layers.Dense(10, activation='softmax'))
# 查看模型结构
model.summary()
输入形状 #
python
import tensorflow as tf
# 指定输入形状
model = tf.keras.Sequential([
tf.keras.layers.Dense(64, input_shape=(784,)),
tf.keras.layers.Dense(10)
])
# 使用 Input 层
model = tf.keras.Sequential([
tf.keras.layers.Input(shape=(784,)),
tf.keras.layers.Dense(64),
tf.keras.layers.Dense(10)
])
# 输入形状会自动推断
model = tf.keras.Sequential([
tf.keras.layers.Dense(64, input_shape=(784,)),
tf.keras.layers.Dense(10)
])
print(f"输入形状: {model.input_shape}")
print(f"输出形状: {model.output_shape}")
图像模型示例 #
python
import tensorflow as tf
# CNN 图像分类模型
model = tf.keras.Sequential([
tf.keras.layers.Conv2D(32, 3, activation='relu', input_shape=(28, 28, 1)),
tf.keras.layers.MaxPooling2D(),
tf.keras.layers.Conv2D(64, 3, activation='relu'),
tf.keras.layers.MaxPooling2D(),
tf.keras.layers.Conv2D(64, 3, activation='relu'),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(64, activation='relu'),
tf.keras.layers.Dense(10, activation='softmax')
])
model.summary()
Functional API #
Functional API 提供更灵活的模型构建方式,支持多输入、多输出和复杂的网络拓扑。
基本用法 #
python
import tensorflow as tf
# 定义输入
inputs = tf.keras.Input(shape=(784,))
# 定义层连接
x = tf.keras.layers.Dense(64, activation='relu')(inputs)
x = tf.keras.layers.Dropout(0.2)(x)
x = tf.keras.layers.Dense(32, activation='relu')(x)
outputs = tf.keras.layers.Dense(10, activation='softmax')(x)
# 创建模型
model = tf.keras.Model(inputs=inputs, outputs=outputs)
model.summary()
多输入模型 #
python
import tensorflow as tf
# 图像输入
image_input = tf.keras.Input(shape=(224, 224, 3), name='image')
x1 = tf.keras.layers.Conv2D(32, 3, activation='relu')(image_input)
x1 = tf.keras.layers.GlobalAveragePooling2D()(x1)
# 文本输入
text_input = tf.keras.Input(shape=(100,), name='text')
x2 = tf.keras.layers.Embedding(10000, 64)(text_input)
x2 = tf.keras.layers.LSTM(32)(x2)
# 合并
concatenated = tf.keras.layers.concatenate([x1, x2])
x = tf.keras.layers.Dense(64, activation='relu')(concatenated)
output = tf.keras.layers.Dense(10, activation='softmax')(x)
model = tf.keras.Model(
inputs=[image_input, text_input],
outputs=output
)
model.summary()
多输出模型 #
python
import tensorflow as tf
# 输入
inputs = tf.keras.Input(shape=(100,), name='text')
x = tf.keras.layers.Embedding(10000, 64)(inputs)
x = tf.keras.layers.LSTM(64, return_sequences=True)(x)
# 分类输出
class_output = tf.keras.layers.Dense(10, activation='softmax', name='classification')(x[:, -1, :])
# 序列标注输出
seq_output = tf.keras.layers.Dense(5, activation='softmax', name='sequence')(x)
model = tf.keras.Model(
inputs=inputs,
outputs=[class_output, seq_output]
)
model.summary()
共享层 #
python
import tensorflow as tf
# 共享嵌入层
shared_embedding = tf.keras.layers.Embedding(10000, 64)
# 两个输入共享同一个嵌入层
input_a = tf.keras.Input(shape=(100,), name='input_a')
input_b = tf.keras.Input(shape=(100,), name='input_b')
embedded_a = shared_embedding(input_a)
embedded_b = shared_embedding(input_b)
# 各自处理
lstm_a = tf.keras.layers.LSTM(32)(embedded_a)
lstm_b = tf.keras.layers.LSTM(32)(embedded_b)
# 合并
merged = tf.keras.layers.concatenate([lstm_a, lstm_b])
output = tf.keras.layers.Dense(1, activation='sigmoid')(merged)
model = tf.keras.Model(
inputs=[input_a, input_b],
outputs=output
)
model.summary()
残差连接 #
python
import tensorflow as tf
def residual_block(x, filters):
shortcut = x
x = tf.keras.layers.Conv2D(filters, 3, padding='same')(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.ReLU()(x)
x = tf.keras.layers.Conv2D(filters, 3, padding='same')(x)
x = tf.keras.layers.BatchNormalization()(x)
if shortcut.shape[-1] != filters:
shortcut = tf.keras.layers.Conv2D(filters, 1)(shortcut)
x = tf.keras.layers.Add()([x, shortcut])
x = tf.keras.layers.ReLU()(x)
return x
inputs = tf.keras.Input(shape=(224, 224, 3))
x = tf.keras.layers.Conv2D(64, 7, strides=2, padding='same')(inputs)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.ReLU()(x)
x = tf.keras.layers.MaxPooling2D(3, strides=2, padding='same')(x)
x = residual_block(x, 64)
x = residual_block(x, 64)
x = residual_block(x, 128)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
outputs = tf.keras.layers.Dense(1000, activation='softmax')(x)
model = tf.keras.Model(inputs, outputs)
model.summary()
模型编译 #
编译配置 #
python
import tensorflow as tf
model = tf.keras.Sequential([
tf.keras.layers.Dense(64, activation='relu', input_shape=(784,)),
tf.keras.layers.Dense(10, activation='softmax')
])
# 编译模型
model.compile(
optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy']
)
# 详细配置
model.compile(
optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
loss=tf.keras.losses.SparseCategoricalCrossentropy(),
metrics=[
tf.keras.metrics.SparseCategoricalAccuracy(name='accuracy'),
tf.keras.metrics.SparseTopKCategoricalAccuracy(k=5, name='top5_acc')
]
)
常用优化器 #
python
import tensorflow as tf
optimizers = {
'sgd': tf.keras.optimizers.SGD(learning_rate=0.01),
'sgd_momentum': tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9),
'adam': tf.keras.optimizers.Adam(learning_rate=0.001),
'adamw': tf.keras.optimizers.AdamW(learning_rate=0.001, weight_decay=0.01),
'rmsprop': tf.keras.optimizers.RMSprop(learning_rate=0.001),
'adagrad': tf.keras.optimizers.Adagrad(learning_rate=0.01),
'adadelta': tf.keras.optimizers.Adadelta(learning_rate=1.0),
'nadam': tf.keras.optimizers.Nadam(learning_rate=0.001),
'ftrl': tf.keras.optimizers.Ftrl(learning_rate=0.01)
}
model.compile(optimizer=optimizers['adam'], loss='mse')
常用损失函数 #
python
import tensorflow as tf
losses = {
'mse': tf.keras.losses.MeanSquaredError(),
'mae': tf.keras.losses.MeanAbsoluteError(),
'binary_crossentropy': tf.keras.losses.BinaryCrossentropy(),
'categorical_crossentropy': tf.keras.losses.CategoricalCrossentropy(),
'sparse_categorical_crossentropy': tf.keras.losses.SparseCategoricalCrossentropy(),
'hinge': tf.keras.losses.Hinge(),
'huber': tf.keras.losses.Huber()
}
model.compile(optimizer='adam', loss=losses['mse'])
模型训练 #
fit 方法 #
python
import tensorflow as tf
import numpy as np
# 创建模型
model = tf.keras.Sequential([
tf.keras.layers.Dense(64, activation='relu', input_shape=(784,)),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(10, activation='softmax')
])
model.compile(
optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy']
)
# 生成示例数据
x_train = np.random.random((1000, 784))
y_train = np.random.randint(10, size=(1000,))
x_val = np.random.random((200, 784))
y_val = np.random.randint(10, size=(200,))
# 训练
history = model.fit(
x_train, y_train,
batch_size=32,
epochs=10,
validation_data=(x_val, y_val)
)
使用 tf.data #
python
import tensorflow as tf
# 创建数据集
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_dataset = train_dataset.shuffle(1000).batch(32)
val_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val))
val_dataset = val_dataset.batch(32)
# 训练
history = model.fit(
train_dataset,
epochs=10,
validation_data=val_dataset
)
回调函数 #
python
import tensorflow as tf
callbacks = [
# 早停
tf.keras.callbacks.EarlyStopping(
monitor='val_loss',
patience=5,
restore_best_weights=True
),
# 学习率衰减
tf.keras.callbacks.ReduceLROnPlateau(
monitor='val_loss',
factor=0.5,
patience=3,
min_lr=1e-6
),
# 模型检查点
tf.keras.callbacks.ModelCheckpoint(
'best_model.keras',
monitor='val_accuracy',
save_best_only=True
),
# TensorBoard
tf.keras.callbacks.TensorBoard(
log_dir='./logs',
histogram_freq=1
),
# CSV 日志
tf.keras.callbacks.CSVLogger('training.log'),
# 自定义回调
tf.keras.callbacks.LambdaCallback(
on_epoch_end=lambda epoch, logs: print(f"Epoch {epoch}: val_acc = {logs['val_accuracy']:.4f}")
)
]
history = model.fit(
x_train, y_train,
epochs=50,
validation_data=(x_val, y_val),
callbacks=callbacks
)
模型评估与预测 #
python
import tensorflow as tf
# 评估
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f"测试损失: {test_loss:.4f}")
print(f"测试准确率: {test_acc:.4f}")
# 预测
predictions = model.predict(x_test)
print(f"预测形状: {predictions.shape}")
print(f"第一个预测: {predictions[0]}")
# 获取预测类别
predicted_classes = tf.argmax(predictions, axis=1)
print(f"预测类别: {predicted_classes[:10]}")
下一步 #
现在你已经掌握了 Keras API,接下来学习 网络层,了解各种常用的神经网络层!
最后更新:2026-04-04