变量与状态 #
什么是 Variable? #
Variable(变量)是 TensorFlow 中用于表示可变状态的类。与张量(Tensor)不同,Variable 的值可以在计算过程中被修改,这使得它成为存储和更新模型参数的理想选择。
Variable vs Tensor #
text
┌─────────────────────────────────────────────────────────────┐
│ Variable vs Tensor │
├─────────────────────────────────────────────────────────────┤
│ │
│ Tensor: │
│ - 不可变(immutable) │
│ - 值一旦创建不能改变 │
│ - 用于存储中间计算结果 │
│ │
│ Variable: │
│ - 可变(mutable) │
│ - 值可以通过 assign 等方法更新 │
│ - 用于存储模型参数(权重、偏置) │
│ │
└─────────────────────────────────────────────────────────────┘
创建变量 #
基本创建 #
python
import tensorflow as tf
# 从初始值创建
v1 = tf.Variable([1, 2, 3, 4])
print(f"变量: {v1}")
print(f"值: {v1.numpy()}")
# 指定数据类型
v2 = tf.Variable([1, 2, 3], dtype=tf.float32)
print(f"数据类型: {v2.dtype}")
# 指定名称
v3 = tf.Variable([1, 2, 3], name='my_variable')
print(f"名称: {v3.name}")
# 从张量创建
tensor = tf.constant([[1, 2], [3, 4]])
v4 = tf.Variable(tensor)
print(f"从张量创建:\n{v4.numpy()}")
使用初始化器 #
python
import tensorflow as tf
# 使用 zeros 初始化
v_zeros = tf.Variable(tf.zeros([3, 4]))
print(f"zeros 初始化形状: {v_zeros.shape}")
# 使用 ones 初始化
v_ones = tf.Variable(tf.ones([2, 3]))
print(f"ones 初始化形状: {v_ones.shape}")
# 使用随机初始化
v_random = tf.Variable(tf.random.normal([3, 3], mean=0, stddev=1))
print(f"随机初始化:\n{v_random.numpy()}")
# 使用 Glorot 均匀初始化(常用于神经网络)
initializer = tf.keras.initializers.GlorotUniform()
v_glorot = tf.Variable(initializer(shape=[3, 4]))
print(f"Glorot 初始化:\n{v_glorot.numpy()}")
常用初始化器 #
python
import tensorflow as tf
# Glorot/Xavier 初始化
glorot_uniform = tf.keras.initializers.GlorotUniform()
glorot_normal = tf.keras.initializers.GlorotNormal()
# He 初始化(适用于 ReLU)
he_uniform = tf.keras.initializers.HeUniform()
he_normal = tf.keras.initializers.HeNormal()
# Lecun 初始化(适用于 SELU)
lecun_normal = tf.keras.initializers.LecunNormal()
# 常数初始化
zeros = tf.keras.initializers.Zeros()
ones = tf.keras.initializers.Ones()
constant = tf.keras.initializers.Constant(0.5)
# 随机初始化
random_normal = tf.keras.initializers.RandomNormal(mean=0, stddev=0.05)
random_uniform = tf.keras.initializers.RandomUniform(minval=-0.05, maxval=0.05)
# 截断正态分布
truncated_normal = tf.keras.initializers.TruncatedNormal(mean=0, stddev=0.05)
# 正交初始化
orthogonal = tf.keras.initializers.Orthogonal()
# 使用示例
v = tf.Variable(he_normal(shape=[128, 64]))
print(f"He 初始化变量形状: {v.shape}")
变量属性 #
python
import tensorflow as tf
v = tf.Variable([[1, 2, 3], [4, 5, 6]], dtype=tf.float32, name='weights')
# 基本属性
print(f"值:\n{v.numpy()}")
print(f"形状: {v.shape}")
print(f"数据类型: {v.dtype}")
print(f"名称: {v.name}")
print(f"设备: {v.device}")
# 是否可训练
print(f"可训练: {v.trainable}")
# 作为张量使用
print(f"作为张量: {tf.convert_to_tensor(v)}")
变量更新 #
assign 方法 #
python
import tensorflow as tf
v = tf.Variable([1, 2, 3])
# 完全替换值
v.assign([4, 5, 6])
print(f"assign 后: {v.numpy()}")
# assign_add(加法更新)
v.assign_add([1, 1, 1])
print(f"assign_add 后: {v.numpy()}")
# assign_sub(减法更新)
v.assign_sub([1, 1, 1])
print(f"assign_sub 后: {v.numpy()}")
索引更新 #
python
import tensorflow as tf
v = tf.Variable([0, 0, 0, 0, 0])
# 更新特定索引
v[0].assign(1)
v[1:3].assign([2, 3])
print(f"索引更新后: {v.numpy()}")
# 矩阵更新
m = tf.Variable(tf.zeros([3, 3]))
m[0, :].assign([1, 2, 3])
m[1, :].assign([4, 5, 6])
print(f"矩阵更新后:\n{m.numpy()}")
# scatter_update
v2 = tf.Variable([0, 0, 0, 0, 0])
indices = tf.constant([0, 2, 4])
updates = tf.constant([1, 2, 3])
v2.scatter_nd_update(tf.expand_dims(indices, 1), updates)
print(f"scatter 更新后: {v2.numpy()}")
在函数中更新 #
python
import tensorflow as tf
v = tf.Variable(0.0)
# 使用 tf.function
@tf.function
def increment():
v.assign_add(1.0)
return v
print(f"初始值: {v.numpy()}")
increment()
print(f"调用后: {v.numpy()}")
increment()
print(f"再次调用: {v.numpy()}")
# 使用 GradientTape
v2 = tf.Variable(2.0)
def compute_loss():
return v2 ** 2
with tf.GradientTape() as tape:
loss = compute_loss()
grad = tape.gradient(loss, v2)
print(f"\n梯度: {grad.numpy()}")
v2.assign_sub(0.1 * grad) # 梯度下降
print(f"更新后: {v2.numpy()}")
变量与梯度 #
计算梯度 #
python
import tensorflow as tf
# 简单示例
w = tf.Variable([[1.0]])
with tf.GradientTape() as tape:
loss = w ** 2
grad = tape.gradient(loss, w)
print(f"梯度: {grad.numpy()}")
# 多变量梯度
w1 = tf.Variable([[1.0]])
w2 = tf.Variable([[2.0]])
with tf.GradientTape() as tape:
loss = w1 ** 2 + w2 ** 3
grads = tape.gradient(loss, [w1, w2])
print(f"w1 梯度: {grads[0].numpy()}")
print(f"w2 梯度: {grads[1].numpy()}")
梯度更新 #
python
import tensorflow as tf
# 模拟训练过程
weights = tf.Variable(tf.random.normal([10, 5]))
bias = tf.Variable(tf.zeros([5]))
learning_rate = 0.01
def model(x):
return tf.matmul(x, weights) + bias
def compute_loss(y_pred, y_true):
return tf.reduce_mean(tf.square(y_pred - y_true))
# 模拟数据
x = tf.random.normal([32, 10])
y_true = tf.random.normal([32, 5])
# 训练步骤
with tf.GradientTape() as tape:
y_pred = model(x)
loss = compute_loss(y_pred, y_true)
# 计算梯度
gradients = tape.gradient(loss, [weights, bias])
# 手动更新
weights.assign_sub(learning_rate * gradients[0])
bias.assign_sub(learning_rate * gradients[1])
print(f"损失: {loss.numpy()}")
print(f"权重梯度范数: {tf.norm(gradients[0]).numpy()}")
停止梯度 #
python
import tensorflow as tf
v = tf.Variable([1.0, 2.0, 3.0])
with tf.GradientTape() as tape:
y = v * 2
y_stopped = tf.stop_gradient(y) # 停止梯度传播
z = y_stopped ** 2
grad = tape.gradient(z, v)
print(f"梯度(被停止): {grad}")
# 另一个示例
with tf.GradientTape() as tape:
y = v * 2
z = tf.stop_gradient(y) ** 2 + y ** 2
grad = tape.gradient(z, v)
print(f"部分梯度: {grad.numpy()}")
变量作用域 #
tf.Module #
python
import tensorflow as tf
class MyModel(tf.Module):
def __init__(self, name=None):
super().__init__(name=name)
self.w = tf.Variable(tf.random.normal([3, 2]), name='w')
self.b = tf.Variable(tf.zeros([2]), name='b')
def __call__(self, x):
return tf.matmul(x, self.w) + self.b
model = MyModel(name='my_model')
print(f"变量列表: {model.variables}")
print(f"可训练变量: {model.trainable_variables}")
# 使用模型
x = tf.constant([[1.0, 2.0, 3.0]])
output = model(x)
print(f"输出: {output.numpy()}")
Keras Layer #
python
import tensorflow as tf
class DenseLayer(tf.keras.layers.Layer):
def __init__(self, units=32):
super().__init__()
self.units = units
def build(self, input_shape):
self.w = self.add_weight(
shape=(input_shape[-1], self.units),
initializer='glorot_uniform',
trainable=True,
name='kernel'
)
self.b = self.add_weight(
shape=(self.units,),
initializer='zeros',
trainable=True,
name='bias'
)
def call(self, inputs):
return tf.matmul(inputs, self.w) + self.b
layer = DenseLayer(4)
x = tf.random.normal([2, 3])
output = layer(x)
print(f"层变量: {layer.variables}")
print(f"权重: {layer.weights}")
print(f"可训练变量: {layer.trainable_variables}")
变量持久化 #
Checkpoint #
python
import tensorflow as tf
import os
# 创建变量
v1 = tf.Variable(tf.random.normal([3, 3]), name='v1')
v2 = tf.Variable(tf.random.normal([2, 2]), name='v2')
# 创建检查点
checkpoint = tf.train.Checkpoint(v1=v1, v2=v2)
manager = tf.train.CheckpointManager(
checkpoint,
directory='./checkpoints',
max_to_keep=3
)
# 保存
path = manager.save()
print(f"保存到: {path}")
# 修改变量
v1.assign(tf.zeros([3, 3]))
print(f"修改后 v1:\n{v1.numpy()}")
# 恢复
checkpoint.restore(manager.latest_checkpoint)
print(f"恢复后 v1:\n{v1.numpy()}")
SavedModel #
python
import tensorflow as tf
import os
# 创建简单模型
model = tf.keras.Sequential([
tf.keras.layers.Dense(10, input_shape=(5,)),
tf.keras.layers.Dense(2)
])
# 训练模型...
x = tf.random.normal([32, 5])
model.compile(optimizer='adam', loss='mse')
# 保存模型
model.save('./saved_model')
# 加载模型
loaded_model = tf.keras.models.load_model('./saved_model')
# 验证
output1 = model(x)
output2 = loaded_model(x)
print(f"输出一致: {tf.reduce_all(output1 == output2).numpy()}")
变量共享 #
python
import tensorflow as tf
# 共享变量的层
class SharedLayer(tf.keras.layers.Layer):
def __init__(self, shared_weights=None):
super().__init__()
if shared_weights is not None:
self.w = shared_weights
else:
self.w = self.add_weight('kernel', shape=[4, 4])
def call(self, x):
return tf.matmul(x, self.w)
# 创建共享权重
shared_w = tf.Variable(tf.random.normal([4, 4]))
# 两个层共享权重
layer1 = SharedLayer(shared_w)
layer2 = SharedLayer(shared_w)
x = tf.random.normal([2, 4])
print(f"layer1 输出: {layer1(x).numpy()}")
print(f"layer2 输出: {layer2(x).numpy()}")
print(f"共享同一权重: {layer1.w is layer2.w}")
变量约束 #
python
import tensorflow as tf
# 创建带约束的变量
constraint = tf.keras.constraints.NonNeg() # 非负约束
v = tf.Variable(tf.random.normal([3, 3]), constraint=constraint)
# 在更新后应用约束
v.assign(v - 1.0) # 可能产生负值
v.assign(constraint(v)) # 应用约束
print(f"约束后:\n{v.numpy()}")
# 常用约束
constraints = {
'nonneg': tf.keras.constraints.NonNeg(),
'unit_norm': tf.keras.constraints.UnitNorm(axis=0),
'max_norm': tf.keras.constraints.MaxNorm(max_value=2.0),
'min_max_norm': tf.keras.constraints.MinMaxNorm(
min_value=0.0, max_value=1.0, rate=0.5
)
}
# 在层中使用约束
layer = tf.keras.layers.Dense(
10,
kernel_constraint=tf.keras.constraints.MaxNorm(2.0)
)
下一步 #
现在你已经掌握了变量与状态管理,接下来学习 Keras API,开始构建神经网络模型!
最后更新:2026-04-04