快速开始 #

环境准备 #

硬件要求 #

text
最低配置(QLoRA 微调 7B 模型):
├── GPU:8GB+ 显存
├── 内存:16GB+
├── 存储:50GB+
└── 示例:RTX 3090、RTX 4090

推荐配置(LoRA 微调 7B 模型):
├── GPU:24GB+ 显存
├── 内存:32GB+
├── 存储:100GB+
└── 示例:A10、A100、H100

全量微调配置:
├── GPU:80GB+ 显存
├── 多卡并行
├── 存储:500GB+
└── 示例:A100 80GB × 8

软件环境 #

bash
创建虚拟环境
conda create -n finetune python=3.10
conda activate finetune

安装 PyTorch
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

安装核心库
pip install transformers datasets peft accelerate bitsandbytes

安装工具库
pip install wandb tensorboard mlflow

验证环境 #

python
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

print(f"PyTorch 版本: {torch.__version__}")
print(f"CUDA 可用: {torch.cuda.is_available()}")
print(f"CUDA 版本: {torch.version.cuda}")
print(f"GPU 数量: {torch.cuda.device_count()}")

if torch.cuda.is_available():
    print(f"GPU 名称: {torch.cuda.get_device_name(0)}")
    print(f"GPU 显存: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB")

第一个微调实验 #

任务说明 #

text
任务:情感分析微调
├── 目标:让模型识别文本情感(正面/负面)
├── 模型:Qwen/Qwen2-0.5B
├── 方法:LoRA
└── 数据:IMDB 电影评论数据集

步骤 1:准备数据 #

python
from datasets import load_dataset

dataset = load_dataset("imdb")

print(f"训练集大小: {len(dataset['train'])}")
print(f"测试集大小: {len(dataset['test'])}")

print("\n数据示例:")
print(f"文本: {dataset['train'][0]['text'][:100]}...")
print(f"标签: {dataset['train'][0]['label']}")

步骤 2:数据预处理 #

python
from transformers import AutoTokenizer

model_name = "Qwen/Qwen2-0.5B"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

def preprocess_function(examples):
    prompts = [
        f"分析以下电影评论的情感,回答'正面'或'负面':\n评论:{text}\n情感:"
        for text in examples['text']
    ]
    
    labels = ['正面' if label == 1 else '负面' for label in examples['label']]
    
    model_inputs = tokenizer(
        prompts,
        max_length=512,
        truncation=True,
        padding='max_length'
    )
    
    labels = tokenizer(
        labels,
        max_length=10,
        truncation=True,
        padding='max_length'
    )
    
    model_inputs['labels'] = labels['input_ids']
    return model_inputs

tokenized_dataset = dataset.map(
    preprocess_function,
    batched=True,
    remove_columns=dataset['train'].column_names
)

步骤 3:加载模型 #

python
from transformers import AutoModelForCausalLM
from peft import LoraConfig, get_peft_model, TaskType
import torch

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True
)

lora_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=16,
    lora_alpha=32,
    lora_dropout=0.1,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
    bias="none"
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

步骤 4:配置训练 #

python
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=3,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    weight_decay=0.01,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    logging_steps=10,
    fp16=True,
    gradient_checkpointing=True,
    optim="adamw_torch",
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset['train'].select(range(1000)),
    eval_dataset=tokenized_dataset['test'].select(range(200)),
    tokenizer=tokenizer,
)

步骤 5:开始训练 #

python
trainer.train()

trainer.save_model("./fine-tuned-model")
tokenizer.save_pretrained("./fine-tuned-model")

步骤 6:测试模型 #

python
from peft import PeftModel

base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True
)

fine_tuned_model = PeftModel.from_pretrained(base_model, "./fine-tuned-model")

def predict_sentiment(text):
    prompt = f"分析以下电影评论的情感,回答'正面'或'负面':\n评论:{text}\n情感:"
    inputs = tokenizer(prompt, return_tensors="pt").to(fine_tuned_model.device)
    
    with torch.no_grad():
        outputs = fine_tuned_model.generate(
            **inputs,
            max_new_tokens=10,
            temperature=0.1,
            do_sample=True
        )
    
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response.split("情感:")[-1].strip()

test_texts = [
    "This movie is absolutely amazing! I loved every minute of it.",
    "Terrible film. Waste of time and money.",
    "The acting was okay, but the plot was confusing."
]

for text in test_texts:
    sentiment = predict_sentiment(text)
    print(f"评论: {text}")
    print(f"情感: {sentiment}\n")

完整代码示例 #

训练脚本 #

python
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    TrainingArguments,
    Trainer,
    DataCollatorForSeq2Seq
)
from datasets import load_dataset
from peft import LoraConfig, get_peft_model, TaskType

def main():
    model_name = "Qwen/Qwen2-0.5B"
    output_dir = "./fine-tuned-model"
    
    print("加载数据集...")
    dataset = load_dataset("imdb")
    
    print("加载 tokenizer...")
    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
    
    def preprocess_function(examples):
        prompts = [
            f"分析以下电影评论的情感,回答'正面'或'负面':\n评论:{text}\n情感:"
            for text in examples['text']
        ]
        
        labels = ['正面' if label == 1 else '负面' for label in examples['label']]
        
        full_texts = [p + l for p, l in zip(prompts, labels)]
        
        model_inputs = tokenizer(
            full_texts,
            max_length=512,
            truncation=True,
            padding='max_length',
            return_tensors='pt'
        )
        
        labels = model_inputs['input_ids'].clone()
        labels[labels == tokenizer.pad_token_id] = -100
        
        prompt_lengths = [len(tokenizer.encode(p, add_special_tokens=False)) for p in prompts]
        for i, prompt_len in enumerate(prompt_lengths):
            labels[i, :prompt_len] = -100
        
        model_inputs['labels'] = labels
        return model_inputs
    
    print("预处理数据...")
    tokenized_dataset = dataset.map(
        preprocess_function,
        batched=True,
        remove_columns=dataset['train'].column_names
    )
    
    print("加载模型...")
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        torch_dtype=torch.float16,
        device_map="auto",
        trust_remote_code=True
    )
    
    print("配置 LoRA...")
    lora_config = LoraConfig(
        task_type=TaskType.CAUSAL_LM,
        r=16,
        lora_alpha=32,
        lora_dropout=0.1,
        target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
        bias="none"
    )
    
    model = get_peft_model(model, lora_config)
    model.print_trainable_parameters()
    
    print("配置训练参数...")
    training_args = TrainingArguments(
        output_dir=output_dir,
        num_train_epochs=3,
        per_device_train_batch_size=4,
        per_device_eval_batch_size=4,
        gradient_accumulation_steps=4,
        learning_rate=2e-4,
        weight_decay=0.01,
        eval_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True,
        logging_steps=10,
        fp16=True,
        gradient_checkpointing=True,
        optim="adamw_torch",
        report_to="none",
        save_total_limit=2
    )
    
    data_collator = DataCollatorForSeq2Seq(
        tokenizer=tokenizer,
        model=model,
        padding=True
    )
    
    print("初始化 Trainer...")
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_dataset['train'].select(range(1000)),
        eval_dataset=tokenized_dataset['test'].select(range(200)),
        tokenizer=tokenizer,
        data_collator=data_collator
    )
    
    print("开始训练...")
    trainer.train()
    
    print("保存模型...")
    trainer.save_model(output_dir)
    tokenizer.save_pretrained(output_dir)
    
    print("训练完成!")

if __name__ == "__main__":
    main()

推理脚本 #

python
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

def load_fine_tuned_model(base_model_name, lora_model_path):
    print("加载基础模型...")
    base_model = AutoModelForCausalLM.from_pretrained(
        base_model_name,
        torch_dtype=torch.float16,
        device_map="auto",
        trust_remote_code=True
    )
    
    print("加载 LoRA 权重...")
    model = PeftModel.from_pretrained(base_model, lora_model_path)
    
    print("加载 tokenizer...")
    tokenizer = AutoTokenizer.from_pretrained(lora_model_path, trust_remote_code=True)
    
    return model, tokenizer

def predict_sentiment(model, tokenizer, text):
    prompt = f"分析以下电影评论的情感,回答'正面'或'负面':\n评论:{text}\n情感:"
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=10,
            temperature=0.1,
            do_sample=True,
            pad_token_id=tokenizer.pad_token_id
        )
    
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response.split("情感:")[-1].strip()

def main():
    base_model_name = "Qwen/Qwen2-0.5B"
    lora_model_path = "./fine-tuned-model"
    
    model, tokenizer = load_fine_tuned_model(base_model_name, lora_model_path)
    
    test_texts = [
        "This movie is absolutely amazing! I loved every minute of it.",
        "Terrible film. Waste of time and money.",
        "The acting was okay, but the plot was confusing.",
        "One of the best movies I've seen this year!",
        "I fell asleep halfway through. Very boring."
    ]
    
    print("\n测试结果:")
    print("-" * 80)
    for text in test_texts:
        sentiment = predict_sentiment(model, tokenizer, text)
        print(f"评论: {text}")
        print(f"情感: {sentiment}")
        print("-" * 80)

if __name__ == "__main__":
    main()

常见问题 #

显存不足 #

text
问题:CUDA out of memory

解决方案:
1. 减小批次大小
   per_device_train_batch_size=1

2. 增加梯度累积
   gradient_accumulation_steps=16

3. 使用 QLoRA
   from transformers import BitsAndBytesConfig
   bnb_config = BitsAndBytesConfig(
       load_in_4bit=True,
       bnb_4bit_compute_dtype=torch.float16
   )

4. 启用梯度检查点
   gradient_checkpointing=True

训练不稳定 #

text
问题:Loss 不下降或震荡

解决方案:
1. 降低学习率
   learning_rate=1e-5

2. 增加预热步数
   warmup_steps=100

3. 使用学习率调度器
   lr_scheduler_type="cosine"

4. 检查数据质量
   - 确保标签正确
   - 检查数据分布

模型不收敛 #

text
问题:模型性能没有提升

解决方案:
1. 检查数据量
   - 数据太少:增加数据
   - 数据太多:可能过拟合

2. 调整 LoRA 参数
   - 增加 rank: r=32
   - 调整 alpha: lora_alpha=64

3. 调整训练轮数
   - 太少:增加 epochs
   - 太多:减少 epochs

4. 检查任务难度
   - 任务太难:简化任务
   - 任务太简单:增加复杂度

下一步 #

恭喜你完成了第一个微调实验!接下来可以:

  1. 学习 数据准备,掌握数据处理技巧
  2. 学习 模型选择,选择合适的基座模型
  3. 学习 训练配置,深入理解训练参数
最后更新:2026-04-05