RNN 序列模型实战 #
RNN 概述 #
循环神经网络(RNN)专门用于处理序列数据,能够捕捉序列中的时序依赖关系。
RNN 类型 #
text
┌─────────────────────────────────────────────────────────────┐
│ RNN 类型 │
├─────────────────────────────────────────────────────────────┤
│ │
│ SimpleRNN │
│ ├── 基本循环神经网络 │
│ └── 存在梯度消失问题 │
│ │
│ LSTM │
│ ├── 长短期记忆网络 │
│ ├── 门控机制 │
│ └── 适合长序列 │
│ │
│ GRU │
│ ├── 门控循环单元 │
│ ├── 参数更少 │
│ └── 训练更快 │
│ │
│ Bidirectional │
│ ├── 双向 RNN │
│ └── 同时考虑前后文 │
│ │
└─────────────────────────────────────────────────────────────┘
文本分类 #
数据准备 #
python
import tensorflow as tf
import numpy as np
# 加载 IMDB 数据集
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.imdb.load_data(num_words=10000)
# 填充序列
maxlen = 200
x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = tf.keras.preprocessing.sequence.pad_sequences(x_test, maxlen=maxlen)
print(f"训练集形状: {x_train.shape}")
print(f"测试集形状: {x_test.shape}")
print(f"标签分布: {np.bincount(y_train)}")
LSTM 模型 #
python
import tensorflow as tf
model = tf.keras.Sequential([
tf.keras.layers.Embedding(10000, 128, input_length=maxlen),
tf.keras.layers.LSTM(64, dropout=0.2, recurrent_dropout=0.2),
tf.keras.layers.Dense(64, activation='relu'),
tf.keras.layers.Dropout(0.5),
tf.keras.layers.Dense(1, activation='sigmoid')
])
model.compile(
optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy']
)
model.summary()
history = model.fit(
x_train, y_train,
batch_size=64,
epochs=10,
validation_split=0.2
)
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f"测试准确率: {test_acc:.4f}")
双向 LSTM #
python
import tensorflow as tf
model = tf.keras.Sequential([
tf.keras.layers.Embedding(10000, 128, input_length=maxlen),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True)),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
tf.keras.layers.Dense(64, activation='relu'),
tf.keras.layers.Dropout(0.5),
tf.keras.layers.Dense(1, activation='sigmoid')
])
model.compile(
optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy']
)
history = model.fit(
x_train, y_train,
batch_size=64,
epochs=10,
validation_split=0.2
)
GRU 模型 #
python
import tensorflow as tf
model = tf.keras.Sequential([
tf.keras.layers.Embedding(10000, 128, input_length=maxlen),
tf.keras.layers.GRU(64, dropout=0.2, recurrent_dropout=0.2),
tf.keras.layers.Dense(1, activation='sigmoid')
])
model.compile(
optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy']
)
history = model.fit(
x_train, y_train,
batch_size=64,
epochs=10,
validation_split=0.2
)
时间序列预测 #
数据准备 #
python
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
def generate_time_series(n_steps):
freq1, freq2, offsets1, offsets2 = np.random.rand(4, 1)
time = np.linspace(0, 1, n_steps)
series = 0.5 * np.sin((time - offsets1) * (freq1 * 10 + 10))
series += 0.3 * np.sin((time - offsets2) * (freq2 * 20 + 20))
series += 0.1 * (np.random.rand(n_steps) - 0.5)
return series[0]
n_samples = 1000
n_steps = 50
series = np.array([generate_time_series(n_steps + 10) for _ in range(n_samples)])
x_train = series[:, :n_steps]
y_train = series[:, n_steps:]
x_train = x_train[..., np.newaxis]
y_train = y_train[..., np.newaxis]
print(f"训练集形状: {x_train.shape}")
print(f"标签形状: {y_train.shape}")
序列预测模型 #
python
import tensorflow as tf
model = tf.keras.Sequential([
tf.keras.layers.LSTM(64, return_sequences=True, input_shape=[None, 1]),
tf.keras.layers.LSTM(32),
tf.keras.layers.Dense(10)
])
model.compile(
optimizer='adam',
loss='mse',
metrics=['mae']
)
history = model.fit(
x_train, y_train,
epochs=20,
validation_split=0.2
)
自回归预测 #
python
import tensorflow as tf
model = tf.keras.Sequential([
tf.keras.layers.LSTM(64, return_sequences=True, input_shape=[None, 1]),
tf.keras.layers.Dense(1)
])
model.compile(
optimizer='adam',
loss='mse'
)
def predict_sequence(model, x, n_steps):
y_pred = []
current_input = x
for _ in range(n_steps):
pred = model.predict(current_input, verbose=0)
y_pred.append(pred[:, -1:, :])
current_input = tf.concat([current_input[:, 1:, :], pred[:, -1:, :]], axis=1)
return tf.concat(y_pred, axis=1)
x_test = x_train[:5]
predictions = predict_sequence(model, x_test, 10)
print(f"预测形状: {predictions.shape}")
序列标注 #
数据准备 #
python
import tensorflow as tf
import numpy as np
vocab_size = 10000
maxlen = 100
num_tags = 10
x_train = np.random.randint(0, vocab_size, size=(1000, maxlen))
y_train = np.random.randint(0, num_tags, size=(1000, maxlen))
print(f"输入形状: {x_train.shape}")
print(f"标签形状: {y_train.shape}")
序列标注模型 #
python
import tensorflow as tf
model = tf.keras.Sequential([
tf.keras.layers.Embedding(vocab_size, 128, input_length=maxlen),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True)),
tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(num_tags, activation='softmax'))
])
model.compile(
optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy']
)
model.summary()
history = model.fit(
x_train, y_train,
batch_size=32,
epochs=10,
validation_split=0.2
)
文本生成 #
数据准备 #
python
import tensorflow as tf
import numpy as np
text = "This is a sample text for text generation. We will use this text to train a model."
chars = sorted(set(text))
char2idx = {c: i for i, c in enumerate(chars)}
idx2char = {i: c for i, c in enumerate(chars)}
seq_length = 20
sequences = []
next_chars = []
for i in range(len(text) - seq_length):
sequences.append([char2idx[c] for c in text[i:i + seq_length]])
next_chars.append(char2idx[text[i + seq_length]])
x_train = np.array(sequences)
y_train = tf.keras.utils.to_categorical(next_chars, num_classes=len(chars))
print(f"训练样本数: {len(x_train)}")
print(f"字符表大小: {len(chars)}")
文本生成模型 #
python
import tensorflow as tf
model = tf.keras.Sequential([
tf.keras.layers.Embedding(len(chars), 128, input_length=seq_length),
tf.keras.layers.LSTM(256, return_sequences=True),
tf.keras.layers.LSTM(128),
tf.keras.layers.Dense(len(chars), activation='softmax')
])
model.compile(
optimizer='adam',
loss='categorical_crossentropy'
)
model.summary()
model.fit(x_train, y_train, batch_size=32, epochs=50)
生成文本 #
python
import tensorflow as tf
import numpy as np
def generate_text(model, start_string, char2idx, idx2char, length=100, temperature=1.0):
input_seq = [char2idx[c] for c in start_string]
input_seq = tf.expand_dims(input_seq, 0)
generated = start_string
for _ in range(length):
predictions = model(input_seq)
predictions = predictions[:, -1, :] / temperature
predicted_id = tf.random.categorical(predictions, num_samples=1)[-1, 0].numpy()
generated += idx2char[predicted_id]
input_seq = tf.concat([input_seq[:, 1:], tf.expand_dims([predicted_id], 0)], axis=1)
return generated
generated_text = generate_text(model, "This is", char2idx, idx2char, length=100, temperature=0.5)
print(generated_text)
下一步 #
现在你已经完成了 RNN 序列模型实战,接下来学习 NLP 文本处理,了解更高级的自然语言处理技术!
最后更新:2026-04-04