快速开始 #
环境准备 #
硬件要求 #
text
最低配置(QLoRA 微调 7B 模型):
├── GPU:8GB+ 显存
├── 内存:16GB+
├── 存储:50GB+
└── 示例:RTX 3090、RTX 4090
推荐配置(LoRA 微调 7B 模型):
├── GPU:24GB+ 显存
├── 内存:32GB+
├── 存储:100GB+
└── 示例:A10、A100、H100
全量微调配置:
├── GPU:80GB+ 显存
├── 多卡并行
├── 存储:500GB+
└── 示例:A100 80GB × 8
软件环境 #
bash
创建虚拟环境
conda create -n finetune python=3.10
conda activate finetune
安装 PyTorch
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
安装核心库
pip install transformers datasets peft accelerate bitsandbytes
安装工具库
pip install wandb tensorboard mlflow
验证环境 #
python
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
print(f"PyTorch 版本: {torch.__version__}")
print(f"CUDA 可用: {torch.cuda.is_available()}")
print(f"CUDA 版本: {torch.version.cuda}")
print(f"GPU 数量: {torch.cuda.device_count()}")
if torch.cuda.is_available():
print(f"GPU 名称: {torch.cuda.get_device_name(0)}")
print(f"GPU 显存: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB")
第一个微调实验 #
任务说明 #
text
任务:情感分析微调
├── 目标:让模型识别文本情感(正面/负面)
├── 模型:Qwen/Qwen2-0.5B
├── 方法:LoRA
└── 数据:IMDB 电影评论数据集
步骤 1:准备数据 #
python
from datasets import load_dataset
dataset = load_dataset("imdb")
print(f"训练集大小: {len(dataset['train'])}")
print(f"测试集大小: {len(dataset['test'])}")
print("\n数据示例:")
print(f"文本: {dataset['train'][0]['text'][:100]}...")
print(f"标签: {dataset['train'][0]['label']}")
步骤 2:数据预处理 #
python
from transformers import AutoTokenizer
model_name = "Qwen/Qwen2-0.5B"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
def preprocess_function(examples):
prompts = [
f"分析以下电影评论的情感,回答'正面'或'负面':\n评论:{text}\n情感:"
for text in examples['text']
]
labels = ['正面' if label == 1 else '负面' for label in examples['label']]
model_inputs = tokenizer(
prompts,
max_length=512,
truncation=True,
padding='max_length'
)
labels = tokenizer(
labels,
max_length=10,
truncation=True,
padding='max_length'
)
model_inputs['labels'] = labels['input_ids']
return model_inputs
tokenized_dataset = dataset.map(
preprocess_function,
batched=True,
remove_columns=dataset['train'].column_names
)
步骤 3:加载模型 #
python
from transformers import AutoModelForCausalLM
from peft import LoraConfig, get_peft_model, TaskType
import torch
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16,
device_map="auto",
trust_remote_code=True
)
lora_config = LoraConfig(
task_type=TaskType.CAUSAL_LM,
r=16,
lora_alpha=32,
lora_dropout=0.1,
target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
bias="none"
)
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()
步骤 4:配置训练 #
python
from transformers import TrainingArguments, Trainer
training_args = TrainingArguments(
output_dir="./results",
num_train_epochs=3,
per_device_train_batch_size=4,
per_device_eval_batch_size=4,
gradient_accumulation_steps=4,
learning_rate=2e-4,
weight_decay=0.01,
eval_strategy="epoch",
save_strategy="epoch",
load_best_model_at_end=True,
logging_steps=10,
fp16=True,
gradient_checkpointing=True,
optim="adamw_torch",
report_to="none"
)
trainer = Trainer(
model=model,
args=training_args,
train_dataset=tokenized_dataset['train'].select(range(1000)),
eval_dataset=tokenized_dataset['test'].select(range(200)),
tokenizer=tokenizer,
)
步骤 5:开始训练 #
python
trainer.train()
trainer.save_model("./fine-tuned-model")
tokenizer.save_pretrained("./fine-tuned-model")
步骤 6:测试模型 #
python
from peft import PeftModel
base_model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16,
device_map="auto",
trust_remote_code=True
)
fine_tuned_model = PeftModel.from_pretrained(base_model, "./fine-tuned-model")
def predict_sentiment(text):
prompt = f"分析以下电影评论的情感,回答'正面'或'负面':\n评论:{text}\n情感:"
inputs = tokenizer(prompt, return_tensors="pt").to(fine_tuned_model.device)
with torch.no_grad():
outputs = fine_tuned_model.generate(
**inputs,
max_new_tokens=10,
temperature=0.1,
do_sample=True
)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
return response.split("情感:")[-1].strip()
test_texts = [
"This movie is absolutely amazing! I loved every minute of it.",
"Terrible film. Waste of time and money.",
"The acting was okay, but the plot was confusing."
]
for text in test_texts:
sentiment = predict_sentiment(text)
print(f"评论: {text}")
print(f"情感: {sentiment}\n")
完整代码示例 #
训练脚本 #
python
import torch
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
TrainingArguments,
Trainer,
DataCollatorForSeq2Seq
)
from datasets import load_dataset
from peft import LoraConfig, get_peft_model, TaskType
def main():
model_name = "Qwen/Qwen2-0.5B"
output_dir = "./fine-tuned-model"
print("加载数据集...")
dataset = load_dataset("imdb")
print("加载 tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
def preprocess_function(examples):
prompts = [
f"分析以下电影评论的情感,回答'正面'或'负面':\n评论:{text}\n情感:"
for text in examples['text']
]
labels = ['正面' if label == 1 else '负面' for label in examples['label']]
full_texts = [p + l for p, l in zip(prompts, labels)]
model_inputs = tokenizer(
full_texts,
max_length=512,
truncation=True,
padding='max_length',
return_tensors='pt'
)
labels = model_inputs['input_ids'].clone()
labels[labels == tokenizer.pad_token_id] = -100
prompt_lengths = [len(tokenizer.encode(p, add_special_tokens=False)) for p in prompts]
for i, prompt_len in enumerate(prompt_lengths):
labels[i, :prompt_len] = -100
model_inputs['labels'] = labels
return model_inputs
print("预处理数据...")
tokenized_dataset = dataset.map(
preprocess_function,
batched=True,
remove_columns=dataset['train'].column_names
)
print("加载模型...")
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16,
device_map="auto",
trust_remote_code=True
)
print("配置 LoRA...")
lora_config = LoraConfig(
task_type=TaskType.CAUSAL_LM,
r=16,
lora_alpha=32,
lora_dropout=0.1,
target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
bias="none"
)
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()
print("配置训练参数...")
training_args = TrainingArguments(
output_dir=output_dir,
num_train_epochs=3,
per_device_train_batch_size=4,
per_device_eval_batch_size=4,
gradient_accumulation_steps=4,
learning_rate=2e-4,
weight_decay=0.01,
eval_strategy="epoch",
save_strategy="epoch",
load_best_model_at_end=True,
logging_steps=10,
fp16=True,
gradient_checkpointing=True,
optim="adamw_torch",
report_to="none",
save_total_limit=2
)
data_collator = DataCollatorForSeq2Seq(
tokenizer=tokenizer,
model=model,
padding=True
)
print("初始化 Trainer...")
trainer = Trainer(
model=model,
args=training_args,
train_dataset=tokenized_dataset['train'].select(range(1000)),
eval_dataset=tokenized_dataset['test'].select(range(200)),
tokenizer=tokenizer,
data_collator=data_collator
)
print("开始训练...")
trainer.train()
print("保存模型...")
trainer.save_model(output_dir)
tokenizer.save_pretrained(output_dir)
print("训练完成!")
if __name__ == "__main__":
main()
推理脚本 #
python
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
def load_fine_tuned_model(base_model_name, lora_model_path):
print("加载基础模型...")
base_model = AutoModelForCausalLM.from_pretrained(
base_model_name,
torch_dtype=torch.float16,
device_map="auto",
trust_remote_code=True
)
print("加载 LoRA 权重...")
model = PeftModel.from_pretrained(base_model, lora_model_path)
print("加载 tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(lora_model_path, trust_remote_code=True)
return model, tokenizer
def predict_sentiment(model, tokenizer, text):
prompt = f"分析以下电影评论的情感,回答'正面'或'负面':\n评论:{text}\n情感:"
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=10,
temperature=0.1,
do_sample=True,
pad_token_id=tokenizer.pad_token_id
)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
return response.split("情感:")[-1].strip()
def main():
base_model_name = "Qwen/Qwen2-0.5B"
lora_model_path = "./fine-tuned-model"
model, tokenizer = load_fine_tuned_model(base_model_name, lora_model_path)
test_texts = [
"This movie is absolutely amazing! I loved every minute of it.",
"Terrible film. Waste of time and money.",
"The acting was okay, but the plot was confusing.",
"One of the best movies I've seen this year!",
"I fell asleep halfway through. Very boring."
]
print("\n测试结果:")
print("-" * 80)
for text in test_texts:
sentiment = predict_sentiment(model, tokenizer, text)
print(f"评论: {text}")
print(f"情感: {sentiment}")
print("-" * 80)
if __name__ == "__main__":
main()
常见问题 #
显存不足 #
text
问题:CUDA out of memory
解决方案:
1. 减小批次大小
per_device_train_batch_size=1
2. 增加梯度累积
gradient_accumulation_steps=16
3. 使用 QLoRA
from transformers import BitsAndBytesConfig
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.float16
)
4. 启用梯度检查点
gradient_checkpointing=True
训练不稳定 #
text
问题:Loss 不下降或震荡
解决方案:
1. 降低学习率
learning_rate=1e-5
2. 增加预热步数
warmup_steps=100
3. 使用学习率调度器
lr_scheduler_type="cosine"
4. 检查数据质量
- 确保标签正确
- 检查数据分布
模型不收敛 #
text
问题:模型性能没有提升
解决方案:
1. 检查数据量
- 数据太少:增加数据
- 数据太多:可能过拟合
2. 调整 LoRA 参数
- 增加 rank: r=32
- 调整 alpha: lora_alpha=64
3. 调整训练轮数
- 太少:增加 epochs
- 太多:减少 epochs
4. 检查任务难度
- 任务太难:简化任务
- 任务太简单:增加复杂度
下一步 #
恭喜你完成了第一个微调实验!接下来可以:
最后更新:2026-04-05