自定义层 #
Layer 类概述 #
Keras Layer 是所有层的基础类。通过继承 Layer 类,可以创建自定义的网络层,实现任意复杂的计算逻辑。
Layer 的核心方法 #
text
┌─────────────────────────────────────────────────────────────┐
│ Layer 核心方法 │
├─────────────────────────────────────────────────────────────┤
│ │
│ __init__() │
│ ├── 初始化层配置 │
│ ├── 不依赖输入形状 │
│ └── 定义超参数 │
│ │
│ build(input_shape) │
│ ├── 根据输入形状创建权重 │
│ ├── 延迟创建,首次调用时执行 │
│ └── 使用 add_weight() 添加权重 │
│ │
│ call(inputs) │
│ ├── 定义前向计算逻辑 │
│ ├── 必须实现 │
│ └── 支持 training 参数 │
│ │
└─────────────────────────────────────────────────────────────┘
基本自定义层 #
简单全连接层 #
python
import tensorflow as tf
class SimpleDense(tf.keras.layers.Layer):
def __init__(self, units=32):
super().__init__()
self.units = units
def build(self, input_shape):
self.w = self.add_weight(
shape=(input_shape[-1], self.units),
initializer='glorot_uniform',
trainable=True,
name='kernel'
)
self.b = self.add_weight(
shape=(self.units,),
initializer='zeros',
trainable=True,
name='bias'
)
def call(self, inputs):
return tf.matmul(inputs, self.w) + self.b
layer = SimpleDense(64)
x = tf.random.normal([2, 784])
y = layer(x)
print(f"输出形状: {y.shape}")
print(f"权重数量: {len(layer.weights)}")
带激活函数的层 #
python
import tensorflow as tf
class DenseWithActivation(tf.keras.layers.Layer):
def __init__(self, units, activation='relu'):
super().__init__()
self.units = units
self.activation = tf.keras.activations.get(activation)
def build(self, input_shape):
self.kernel = self.add_weight(
'kernel',
shape=[input_shape[-1], self.units],
initializer='glorot_uniform'
)
self.bias = self.add_weight(
'bias',
shape=[self.units],
initializer='zeros'
)
def call(self, inputs):
output = tf.matmul(inputs, self.kernel) + self.bias
return self.activation(output)
layer = DenseWithActivation(64, activation='relu')
训练/推理模式 #
python
import tensorflow as tf
class DropoutLayer(tf.keras.layers.Layer):
def __init__(self, rate):
super().__init__()
self.rate = rate
def call(self, inputs, training=False):
if training:
return tf.nn.dropout(inputs, rate=self.rate)
return inputs
layer = DropoutLayer(0.5)
x = tf.ones([2, 10])
print(f"推理模式: {layer(x, training=False)}")
print(f"训练模式: {layer(x, training=True)}")
层嵌套 #
python
import tensorflow as tf
class MLPBlock(tf.keras.layers.Layer):
def __init__(self, units_list):
super().__init__()
self.dense_layers = [
tf.keras.layers.Dense(units, activation='relu')
for units in units_list
]
self.dropout = tf.keras.layers.Dropout(0.2)
def call(self, inputs, training=False):
x = inputs
for dense in self.dense_layers:
x = dense(x)
x = self.dropout(x, training=training)
return x
mlp = MLPBlock([64, 32, 16])
x = tf.random.normal([2, 128])
y = mlp(x)
print(f"输出形状: {y.shape}")
print(f"子层数量: {len(mlp.layers)}")
自定义卷积层 #
python
import tensorflow as tf
class CustomConv2D(tf.keras.layers.Layer):
def __init__(self, filters, kernel_size, strides=1, padding='same', activation='relu'):
super().__init__()
self.filters = filters
self.kernel_size = kernel_size
self.strides = strides
self.padding = padding.upper()
self.activation = tf.keras.activations.get(activation)
def build(self, input_shape):
input_channels = input_shape[-1]
self.kernel = self.add_weight(
'kernel',
shape=[self.kernel_size, self.kernel_size, input_channels, self.filters],
initializer='glorot_uniform'
)
self.bias = self.add_weight(
'bias',
shape=[self.filters],
initializer='zeros'
)
def call(self, inputs):
outputs = tf.nn.conv2d(
inputs,
self.kernel,
strides=[1, self.strides, self.strides, 1],
padding=self.padding
)
outputs = outputs + self.bias
return self.activation(outputs)
conv = CustomConv2D(32, 3)
x = tf.random.normal([1, 28, 28, 3])
y = conv(x)
print(f"输出形状: {y.shape}")
自定义注意力层 #
python
import tensorflow as tf
class SelfAttention(tf.keras.layers.Layer):
def __init__(self, embed_dim, num_heads):
super().__init__()
self.embed_dim = embed_dim
self.num_heads = num_heads
self.head_dim = embed_dim // num_heads
assert embed_dim % num_heads == 0, "embed_dim 必须能被 num_heads 整除"
self.query_dense = tf.keras.layers.Dense(embed_dim)
self.key_dense = tf.keras.layers.Dense(embed_dim)
self.value_dense = tf.keras.layers.Dense(embed_dim)
self.combine_heads = tf.keras.layers.Dense(embed_dim)
def separate_heads(self, x, batch_size):
x = tf.reshape(x, (batch_size, -1, self.num_heads, self.head_dim))
return tf.transpose(x, perm=[0, 2, 1, 3])
def call(self, inputs):
batch_size = tf.shape(inputs)[0]
query = self.query_dense(inputs)
key = self.key_dense(inputs)
value = self.value_dense(inputs)
query = self.separate_heads(query, batch_size)
key = self.separate_heads(key, batch_size)
value = self.separate_heads(value, batch_size)
attention_scores = tf.matmul(query, key, transpose_b=True)
attention_scores = attention_scores / tf.math.sqrt(tf.cast(self.head_dim, tf.float32))
attention_weights = tf.nn.softmax(attention_scores, axis=-1)
output = tf.matmul(attention_weights, value)
output = tf.transpose(output, perm=[0, 2, 1, 3])
output = tf.reshape(output, (batch_size, -1, self.embed_dim))
return self.combine_heads(output)
attention = SelfAttention(64, 4)
x = tf.random.normal([2, 10, 64])
y = attention(x)
print(f"输出形状: {y.shape}")
自定义归一化层 #
python
import tensorflow as tf
class LayerNormalization(tf.keras.layers.Layer):
def __init__(self, epsilon=1e-6, **kwargs):
super().__init__(**kwargs)
self.epsilon = epsilon
def build(self, input_shape):
self.gamma = self.add_weight(
'gamma',
shape=input_shape[-1:],
initializer='ones',
trainable=True
)
self.beta = self.add_weight(
'beta',
shape=input_shape[-1:],
initializer='zeros',
trainable=True
)
def call(self, inputs):
mean, variance = tf.nn.moments(inputs, axes=-1, keepdims=True)
normalized = (inputs - mean) / tf.sqrt(variance + self.epsilon)
return self.gamma * normalized + self.beta
layer_norm = LayerNormalization()
x = tf.random.normal([2, 10, 64])
y = layer_norm(x)
print(f"输出形状: {y.shape}")
层序列化 #
python
import tensorflow as tf
class CustomDense(tf.keras.layers.Layer):
def __init__(self, units, activation='relu', **kwargs):
super().__init__(**kwargs)
self.units = units
self.activation = tf.keras.activations.get(activation)
def build(self, input_shape):
self.kernel = self.add_weight(
'kernel',
shape=[input_shape[-1], self.units],
initializer='glorot_uniform'
)
self.bias = self.add_weight(
'bias',
shape=[self.units],
initializer='zeros'
)
def call(self, inputs):
output = tf.matmul(inputs, self.kernel) + self.bias
return self.activation(output)
def get_config(self):
config = super().get_config()
config.update({
'units': self.units,
'activation': tf.keras.activations.serialize(self.activation)
})
return config
@classmethod
def from_config(cls, config):
return cls(**config)
layer = CustomDense(64, activation='relu')
config = layer.get_config()
new_layer = CustomDense.from_config(config)
下一步 #
现在你已经掌握了自定义层,接下来学习 自定义训练,了解如何实现完全自定义的训练循环!
最后更新:2026-04-04