网络层 #
层的概念 #
层(Layer)是神经网络的基本构建块。每个层接收输入张量,进行某种变换,然后输出张量。Keras 提供了丰富的内置层,覆盖了各种常见的神经网络结构。
层的分类 #
text
┌─────────────────────────────────────────────────────────────┐
│ Keras 层分类 │
├─────────────────────────────────────────────────────────────┤
│ │
│ 核心层: Dense, Activation, Dropout, Flatten │
│ │
│ 卷积层: Conv1D, Conv2D, Conv3D, SeparableConv │
│ │
│ 池化层: MaxPooling, AveragePooling, GlobalPooling │
│ │
│ 循环层: SimpleRNN, LSTM, GRU, Bidirectional │
│ │
│ 注意力层: Attention, MultiHeadAttention │
│ │
│ 嵌入层: Embedding │
│ │
│ 归一化层: BatchNormalization, LayerNormalization │
│ │
│ 正则化层: Dropout, SpatialDropout, GaussianDropout │
│ │
└─────────────────────────────────────────────────────────────┘
核心层 #
Dense 层 #
全连接层,最基础的神经网络层。
python
import tensorflow as tf
# 基本 Dense 层
dense = tf.keras.layers.Dense(
units=64,
activation='relu',
use_bias=True,
kernel_initializer='glorot_uniform',
bias_initializer='zeros',
kernel_regularizer=None,
bias_regularizer=None,
activity_regularizer=None,
kernel_constraint=None,
bias_constraint=None
)
# 使用示例
model = tf.keras.Sequential([
tf.keras.layers.Dense(128, activation='relu', input_shape=(784,)),
tf.keras.layers.Dense(64, activation='relu'),
tf.keras.layers.Dense(10, activation='softmax')
])
# 无激活函数
dense_linear = tf.keras.layers.Dense(64, use_bias=False)
# 带正则化
dense_reg = tf.keras.layers.Dense(
64,
kernel_regularizer=tf.keras.regularizers.l2(0.01)
)
Activation 层 #
激活函数层。
python
import tensorflow as tf
# 使用 Activation 层
model = tf.keras.Sequential([
tf.keras.layers.Dense(64, input_shape=(784,)),
tf.keras.layers.Activation('relu'),
tf.keras.layers.Dense(10),
tf.keras.layers.Activation('softmax')
])
# 常用激活函数
activations = {
'relu': tf.keras.layers.Activation('relu'),
'sigmoid': tf.keras.layers.Activation('sigmoid'),
'tanh': tf.keras.layers.Activation('tanh'),
'softmax': tf.keras.layers.Activation('softmax'),
'elu': tf.keras.layers.Activation('elu'),
'selu': tf.keras.layers.Activation('selu'),
'gelu': tf.keras.layers.Activation('gelu'),
'swish': tf.keras.layers.Activation('swish')
}
Dropout 层 #
随机丢弃层,防止过拟合。
python
import tensorflow as tf
model = tf.keras.Sequential([
tf.keras.layers.Dense(128, activation='relu', input_shape=(784,)),
tf.keras.layers.Dropout(0.5),
tf.keras.layers.Dense(64, activation='relu'),
tf.keras.layers.Dropout(0.3),
tf.keras.layers.Dense(10, activation='softmax')
])
# Dropout 参数
dropout = tf.keras.layers.Dropout(
rate=0.5,
noise_shape=None,
seed=42
)
Flatten 层 #
展平层,将多维输入展平为一维。
python
import tensorflow as tf
model = tf.keras.Sequential([
tf.keras.layers.Conv2D(32, 3, activation='relu', input_shape=(28, 28, 1)),
tf.keras.layers.MaxPooling2D(),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(10, activation='softmax')
])
# 查看展平后的形状
x = tf.random.normal([1, 28, 28, 1])
flatten = tf.keras.layers.Flatten()
print(f"展平前: {x.shape}")
print(f"展平后: {flatten(x).shape}")
卷积层 #
Conv2D 层 #
二维卷积层,用于图像处理。
python
import tensorflow as tf
# 基本 Conv2D
conv = tf.keras.layers.Conv2D(
filters=32,
kernel_size=3,
strides=1,
padding='same',
activation='relu',
input_shape=(28, 28, 1)
)
# CNN 模型
model = tf.keras.Sequential([
tf.keras.layers.Conv2D(32, 3, padding='same', activation='relu', input_shape=(224, 224, 3)),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.Conv2D(32, 3, padding='same', activation='relu'),
tf.keras.layers.MaxPooling2D(),
tf.keras.layers.Conv2D(64, 3, padding='same', activation='relu'),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.Conv2D(64, 3, padding='same', activation='relu'),
tf.keras.layers.MaxPooling2D(),
tf.keras.layers.Conv2D(128, 3, padding='same', activation='relu'),
tf.keras.layers.GlobalAveragePooling2D(),
tf.keras.layers.Dense(10, activation='softmax')
])
Conv1D 层 #
一维卷积层,用于序列数据。
python
import tensorflow as tf
model = tf.keras.Sequential([
tf.keras.layers.Embedding(10000, 128, input_length=100),
tf.keras.layers.Conv1D(64, 3, activation='relu'),
tf.keras.layers.MaxPooling1D(),
tf.keras.layers.Conv1D(128, 3, activation='relu'),
tf.keras.layers.GlobalMaxPooling1D(),
tf.keras.layers.Dense(10, activation='softmax')
])
深度可分离卷积 #
python
import tensorflow as tf
model = tf.keras.Sequential([
tf.keras.layers.SeparableConv2D(32, 3, activation='relu', input_shape=(224, 224, 3)),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.SeparableConv2D(64, 3, activation='relu'),
tf.keras.layers.GlobalAveragePooling2D(),
tf.keras.layers.Dense(10, activation='softmax')
])
# 深度卷积 + 逐点卷积
model = tf.keras.Sequential([
tf.keras.layers.DepthwiseConv2D(3, padding='same', input_shape=(224, 224, 3)),
tf.keras.layers.Conv2D(64, 1, activation='relu'),
tf.keras.layers.GlobalAveragePooling2D(),
tf.keras.layers.Dense(10, activation='softmax')
])
池化层 #
最大池化 #
python
import tensorflow as tf
# MaxPooling2D
model = tf.keras.Sequential([
tf.keras.layers.Conv2D(32, 3, activation='relu', input_shape=(28, 28, 1)),
tf.keras.layers.MaxPooling2D(pool_size=2, strides=2),
tf.keras.layers.Conv2D(64, 3, activation='relu'),
tf.keras.layers.MaxPooling2D(pool_size=2, strides=2),
])
# 全局最大池化
global_max = tf.keras.layers.GlobalMaxPooling2D()
平均池化 #
python
import tensorflow as tf
# AveragePooling2D
avg_pool = tf.keras.layers.AveragePooling2D(pool_size=2, strides=2)
# 全局平均池化
global_avg = tf.keras.layers.GlobalAveragePooling2D()
# 使用示例
model = tf.keras.Sequential([
tf.keras.layers.Conv2D(64, 3, activation='relu', input_shape=(224, 224, 3)),
tf.keras.layers.GlobalAveragePooling2D(),
tf.keras.layers.Dense(10, activation='softmax')
])
循环层 #
LSTM 层 #
长短期记忆网络。
python
import tensorflow as tf
# 基本 LSTM
model = tf.keras.Sequential([
tf.keras.layers.Embedding(10000, 128, input_length=100),
tf.keras.layers.LSTM(64),
tf.keras.layers.Dense(10, activation='softmax')
])
# 堆叠 LSTM
model = tf.keras.Sequential([
tf.keras.layers.Embedding(10000, 128, input_length=100),
tf.keras.layers.LSTM(64, return_sequences=True),
tf.keras.layers.LSTM(32),
tf.keras.layers.Dense(10, activation='softmax')
])
# 双向 LSTM
model = tf.keras.Sequential([
tf.keras.layers.Embedding(10000, 128, input_length=100),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
tf.keras.layers.Dense(10, activation='softmax')
])
# LSTM 参数
lstm = tf.keras.layers.LSTM(
units=64,
activation='tanh',
recurrent_activation='sigmoid',
return_sequences=False,
return_state=False,
dropout=0.0,
recurrent_dropout=0.0
)
GRU 层 #
门控循环单元。
python
import tensorflow as tf
# 基本 GRU
model = tf.keras.Sequential([
tf.keras.layers.Embedding(10000, 128, input_length=100),
tf.keras.layers.GRU(64),
tf.keras.layers.Dense(10, activation='softmax')
])
# 双向 GRU
model = tf.keras.Sequential([
tf.keras.layers.Embedding(10000, 128, input_length=100),
tf.keras.layers.Bidirectional(tf.keras.layers.GRU(64)),
tf.keras.layers.Dense(10, activation='softmax')
])
SimpleRNN 层 #
简单循环神经网络。
python
import tensorflow as tf
model = tf.keras.Sequential([
tf.keras.layers.Embedding(10000, 128, input_length=100),
tf.keras.layers.SimpleRNN(64),
tf.keras.layers.Dense(10, activation='softmax')
])
注意力层 #
Attention 层 #
python
import tensorflow as tf
# 基本注意力
query = tf.keras.Input(shape=(None, 64))
value = tf.keras.Input(shape=(None, 64))
attention = tf.keras.layers.Attention()
output = attention([query, value])
model = tf.keras.Model([query, value], output)
MultiHeadAttention 层 #
多头注意力机制。
python
import tensorflow as tf
# 多头注意力
inputs = tf.keras.Input(shape=(None, 64))
mha = tf.keras.layers.MultiHeadAttention(
num_heads=8,
key_dim=64,
dropout=0.1
)
output = mha(inputs, inputs)
model = tf.keras.Model(inputs, output)
# Transformer 块示例
def transformer_block(x, num_heads, key_dim, ff_dim, dropout=0.1):
attn_output = tf.keras.layers.MultiHeadAttention(
num_heads=num_heads,
key_dim=key_dim,
dropout=dropout
)(x, x)
attn_output = tf.keras.layers.Dropout(dropout)(attn_output)
out1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)(x + attn_output)
ffn_output = tf.keras.layers.Dense(ff_dim, activation='relu')(out1)
ffn_output = tf.keras.layers.Dense(key_dim * num_heads)(ffn_output)
ffn_output = tf.keras.layers.Dropout(dropout)(ffn_output)
return tf.keras.layers.LayerNormalization(epsilon=1e-6)(out1 + ffn_output)
嵌入层 #
Embedding 层 #
python
import tensorflow as tf
# 基本嵌入层
model = tf.keras.Sequential([
tf.keras.layers.Embedding(
input_dim=10000,
output_dim=128,
input_length=100
),
tf.keras.layers.LSTM(64),
tf.keras.layers.Dense(10, activation='softmax')
])
# 预训练嵌入
embedding_matrix = tf.random.normal([10000, 128])
embedding = tf.keras.layers.Embedding(
input_dim=10000,
output_dim=128,
weights=[embedding_matrix],
trainable=False
)
归一化层 #
BatchNormalization #
python
import tensorflow as tf
model = tf.keras.Sequential([
tf.keras.layers.Conv2D(32, 3, input_shape=(224, 224, 3)),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.Activation('relu'),
tf.keras.layers.Conv2D(64, 3),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.Activation('relu'),
])
# BatchNorm 参数
bn = tf.keras.layers.BatchNormalization(
axis=-1,
momentum=0.99,
epsilon=0.001,
center=True,
scale=True
)
LayerNormalization #
python
import tensorflow as tf
model = tf.keras.Sequential([
tf.keras.layers.Dense(64, input_shape=(784,)),
tf.keras.layers.LayerNormalization(),
tf.keras.layers.Activation('relu'),
])
# LayerNorm 参数
ln = tf.keras.layers.LayerNormalization(
axis=-1,
epsilon=0.001,
center=True,
scale=True
)
下一步 #
现在你已经掌握了各种网络层,接下来学习 模型构建,了解如何组合层构建完整模型!
最后更新:2026-04-04