自定义层 #

Layer 类概述 #

Keras Layer 是所有层的基础类。通过继承 Layer 类，可以创建自定义的网络层，实现任意复杂的计算逻辑。

Layer 的核心方法 #

text

┌─────────────────────────────────────────────────────────────┐
│                    Layer 核心方法                            │
├─────────────────────────────────────────────────────────────┤
│                                                             │
│  __init__()                                                 │
│  ├── 初始化层配置                                           │
│  ├── 不依赖输入形状                                         │
│  └── 定义超参数                                             │
│                                                             │
│  build(input_shape)                                         │
│  ├── 根据输入形状创建权重                                   │
│  ├── 延迟创建，首次调用时执行                               │
│  └── 使用 add_weight() 添加权重                             │
│                                                             │
│  call(inputs)                                               │
│  ├── 定义前向计算逻辑                                       │
│  ├── 必须实现                                               │
│  └── 支持 training 参数                                     │
│                                                             │
└─────────────────────────────────────────────────────────────┘

基本自定义层 #

简单全连接层 #

python

import tensorflow as tf

class SimpleDense(tf.keras.layers.Layer):
    def __init__(self, units=32):
        super().__init__()
        self.units = units
    
    def build(self, input_shape):
        self.w = self.add_weight(
            shape=(input_shape[-1], self.units),
            initializer='glorot_uniform',
            trainable=True,
            name='kernel'
        )
        self.b = self.add_weight(
            shape=(self.units,),
            initializer='zeros',
            trainable=True,
            name='bias'
        )
    
    def call(self, inputs):
        return tf.matmul(inputs, self.w) + self.b

layer = SimpleDense(64)
x = tf.random.normal([2, 784])
y = layer(x)
print(f"输出形状: {y.shape}")
print(f"权重数量: {len(layer.weights)}")

带激活函数的层 #

python

import tensorflow as tf

class DenseWithActivation(tf.keras.layers.Layer):
    def __init__(self, units, activation='relu'):
        super().__init__()
        self.units = units
        self.activation = tf.keras.activations.get(activation)
    
    def build(self, input_shape):
        self.kernel = self.add_weight(
            'kernel',
            shape=[input_shape[-1], self.units],
            initializer='glorot_uniform'
        )
        self.bias = self.add_weight(
            'bias',
            shape=[self.units],
            initializer='zeros'
        )
    
    def call(self, inputs):
        output = tf.matmul(inputs, self.kernel) + self.bias
        return self.activation(output)

layer = DenseWithActivation(64, activation='relu')

训练/推理模式 #

python

import tensorflow as tf

class DropoutLayer(tf.keras.layers.Layer):
    def __init__(self, rate):
        super().__init__()
        self.rate = rate
    
    def call(self, inputs, training=False):
        if training:
            return tf.nn.dropout(inputs, rate=self.rate)
        return inputs

layer = DropoutLayer(0.5)
x = tf.ones([2, 10])

print(f"推理模式: {layer(x, training=False)}")
print(f"训练模式: {layer(x, training=True)}")

层嵌套 #

python

import tensorflow as tf

class MLPBlock(tf.keras.layers.Layer):
    def __init__(self, units_list):
        super().__init__()
        self.dense_layers = [
            tf.keras.layers.Dense(units, activation='relu')
            for units in units_list
        ]
        self.dropout = tf.keras.layers.Dropout(0.2)
    
    def call(self, inputs, training=False):
        x = inputs
        for dense in self.dense_layers:
            x = dense(x)
            x = self.dropout(x, training=training)
        return x

mlp = MLPBlock([64, 32, 16])
x = tf.random.normal([2, 128])
y = mlp(x)
print(f"输出形状: {y.shape}")
print(f"子层数量: {len(mlp.layers)}")

自定义卷积层 #

python

import tensorflow as tf

class CustomConv2D(tf.keras.layers.Layer):
    def __init__(self, filters, kernel_size, strides=1, padding='same', activation='relu'):
        super().__init__()
        self.filters = filters
        self.kernel_size = kernel_size
        self.strides = strides
        self.padding = padding.upper()
        self.activation = tf.keras.activations.get(activation)
    
    def build(self, input_shape):
        input_channels = input_shape[-1]
        self.kernel = self.add_weight(
            'kernel',
            shape=[self.kernel_size, self.kernel_size, input_channels, self.filters],
            initializer='glorot_uniform'
        )
        self.bias = self.add_weight(
            'bias',
            shape=[self.filters],
            initializer='zeros'
        )
    
    def call(self, inputs):
        outputs = tf.nn.conv2d(
            inputs,
            self.kernel,
            strides=[1, self.strides, self.strides, 1],
            padding=self.padding
        )
        outputs = outputs + self.bias
        return self.activation(outputs)

conv = CustomConv2D(32, 3)
x = tf.random.normal([1, 28, 28, 3])
y = conv(x)
print(f"输出形状: {y.shape}")

自定义注意力层 #

python

import tensorflow as tf

class SelfAttention(tf.keras.layers.Layer):
    def __init__(self, embed_dim, num_heads):
        super().__init__()
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        self.head_dim = embed_dim // num_heads
        
        assert embed_dim % num_heads == 0, "embed_dim 必须能被 num_heads 整除"
        
        self.query_dense = tf.keras.layers.Dense(embed_dim)
        self.key_dense = tf.keras.layers.Dense(embed_dim)
        self.value_dense = tf.keras.layers.Dense(embed_dim)
        self.combine_heads = tf.keras.layers.Dense(embed_dim)
    
    def separate_heads(self, x, batch_size):
        x = tf.reshape(x, (batch_size, -1, self.num_heads, self.head_dim))
        return tf.transpose(x, perm=[0, 2, 1, 3])
    
    def call(self, inputs):
        batch_size = tf.shape(inputs)[0]
        
        query = self.query_dense(inputs)
        key = self.key_dense(inputs)
        value = self.value_dense(inputs)
        
        query = self.separate_heads(query, batch_size)
        key = self.separate_heads(key, batch_size)
        value = self.separate_heads(value, batch_size)
        
        attention_scores = tf.matmul(query, key, transpose_b=True)
        attention_scores = attention_scores / tf.math.sqrt(tf.cast(self.head_dim, tf.float32))
        attention_weights = tf.nn.softmax(attention_scores, axis=-1)
        
        output = tf.matmul(attention_weights, value)
        output = tf.transpose(output, perm=[0, 2, 1, 3])
        output = tf.reshape(output, (batch_size, -1, self.embed_dim))
        
        return self.combine_heads(output)

attention = SelfAttention(64, 4)
x = tf.random.normal([2, 10, 64])
y = attention(x)
print(f"输出形状: {y.shape}")

自定义归一化层 #

python

import tensorflow as tf

class LayerNormalization(tf.keras.layers.Layer):
    def __init__(self, epsilon=1e-6, **kwargs):
        super().__init__(**kwargs)
        self.epsilon = epsilon
    
    def build(self, input_shape):
        self.gamma = self.add_weight(
            'gamma',
            shape=input_shape[-1:],
            initializer='ones',
            trainable=True
        )
        self.beta = self.add_weight(
            'beta',
            shape=input_shape[-1:],
            initializer='zeros',
            trainable=True
        )
    
    def call(self, inputs):
        mean, variance = tf.nn.moments(inputs, axes=-1, keepdims=True)
        normalized = (inputs - mean) / tf.sqrt(variance + self.epsilon)
        return self.gamma * normalized + self.beta

layer_norm = LayerNormalization()
x = tf.random.normal([2, 10, 64])
y = layer_norm(x)
print(f"输出形状: {y.shape}")

层序列化 #

python

import tensorflow as tf

class CustomDense(tf.keras.layers.Layer):
    def __init__(self, units, activation='relu', **kwargs):
        super().__init__(**kwargs)
        self.units = units
        self.activation = tf.keras.activations.get(activation)
    
    def build(self, input_shape):
        self.kernel = self.add_weight(
            'kernel',
            shape=[input_shape[-1], self.units],
            initializer='glorot_uniform'
        )
        self.bias = self.add_weight(
            'bias',
            shape=[self.units],
            initializer='zeros'
        )
    
    def call(self, inputs):
        output = tf.matmul(inputs, self.kernel) + self.bias
        return self.activation(output)
    
    def get_config(self):
        config = super().get_config()
        config.update({
            'units': self.units,
            'activation': tf.keras.activations.serialize(self.activation)
        })
        return config
    
    @classmethod
    def from_config(cls, config):
        return cls(**config)

layer = CustomDense(64, activation='relu')
config = layer.get_config()
new_layer = CustomDense.from_config(config)

下一步 #

现在你已经掌握了自定义层，接下来学习自定义训练，了解如何实现完全自定义的训练循环！