模型管理 #
MLflow Models 概述 #
MLflow Models 提供了一种统一的格式来打包机器学习模型,使其可以在各种下游工具中使用。
text
┌─────────────────────────────────────────────────────────────┐
│ MLflow Models 架构 │
├─────────────────────────────────────────────────────────────┤
│ │
│ ┌─────────────────────────────────────────────────────┐ │
│ │ 训练框架 │ │
│ │ ├── Scikit-learn ├── PyTorch │ │
│ │ ├── TensorFlow ├── XGBoost │ │
│ │ ├── LightGBM ├── ONNX │ │
│ │ └── 更多... │ │
│ └─────────────────────────────────────────────────────┘ │
│ │ │
│ ▼ │
│ ┌─────────────────────────────────────────────────────┐ │
│ │ MLflow Model Format │ │
│ │ ├── MLmodel (元数据) │ │
│ │ ├── 模型文件 │ │
│ │ ├── conda.yaml (环境) │ │
│ │ └── requirements.txt │ │
│ └─────────────────────────────────────────────────────┘ │
│ │ │
│ ▼ │
│ ┌─────────────────────────────────────────────────────┐ │
│ │ 部署目标 │ │
│ │ ├── 本地推理 ├── Docker │ │
│ │ ├── REST API ├── Apache Spark │ │
│ │ ├── AWS SageMaker ├── Azure ML │ │
│ │ └── 更多... │ │
│ └─────────────────────────────────────────────────────┘ │
│ │
└─────────────────────────────────────────────────────────────┘
模型格式 #
MLmodel 文件 #
text
MLmodel 文件示例:
─────────────────────────────────────────────────────────────
artifact_path: model
flavors:
python_function:
env: conda.yaml
loader_module: mlflow.sklearn
model_path: model.pkl
python_version: 3.10.0
sklearn:
code: null
pickled_model: model.pkl
serialization_format: pickle
sklearn_version: 1.3.0
mlflow_version: 2.10.0
model_size_bytes: 12345
model_uuid: abc123-def456
run_id: run123
utc_time_created: '2024-01-01 10:00:00.000000'
─────────────────────────────────────────────────────────────
模型目录结构 #
text
model/
├── MLmodel # 模型元数据
├── model.pkl # 模型文件
├── conda.yaml # Conda 环境配置
├── requirements.txt # Python 依赖
└── python_env.yaml # Python 环境详情
Flavors(模型格式) #
什么是 Flavor? #
text
┌─────────────────────────────────────────────────────────────┐
│ Flavor 概念 │
├─────────────────────────────────────────────────────────────┤
│ │
│ Flavor 是 MLflow 模型的"方言",定义了: │
│ │
│ ├── 如何保存模型 │
│ ├── 如何加载模型 │
│ └── 如何运行推理 │
│ │
│ 内置 Flavors: │
│ ├── python_function (通用接口) │
│ ├── sklearn │
│ ├── tensorflow │
│ ├── pytorch │
│ ├── xgboost │
│ ├── lightgbm │
│ ├── onnx │
│ └── 更多... │
│ │
└─────────────────────────────────────────────────────────────┘
python_function Flavor #
这是所有 MLflow 模型都支持的通用接口:
python
import mlflow
model = mlflow.pyfunc.load_model("models:/my_model/Production")
predictions = model.predict(data)
sklearn Flavor #
python
import mlflow.sklearn
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier()
model.fit(X_train, y_train)
with mlflow.start_run():
mlflow.sklearn.log_model(model, "model")
loaded_model = mlflow.sklearn.load_model("runs:/<run_id>/model")
predictions = loaded_model.predict(X_test)
tensorflow Flavor #
python
import mlflow.tensorflow
import tensorflow as tf
model = tf.keras.Sequential([
tf.keras.layers.Dense(64, activation='relu'),
tf.keras.layers.Dense(10, activation='softmax')
])
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')
with mlflow.start_run():
mlflow.tensorflow.log_model(model, "model")
loaded_model = mlflow.tensorflow.load_model("runs:/<run_id>/model")
predictions = loaded_model.predict(X_test)
pytorch Flavor #
python
import mlflow.pytorch
import torch
import torch.nn as nn
class SimpleModel(nn.Module):
def __init__(self):
super().__init__()
self.fc = nn.Linear(10, 1)
def forward(self, x):
return self.fc(x)
model = SimpleModel()
with mlflow.start_run():
mlflow.pytorch.log_model(model, "model")
loaded_model = mlflow.pytorch.load_model("runs:/<run_id>/model")
predictions = loaded_model(torch.tensor(X_test))
xgboost Flavor #
python
import mlflow.xgboost
import xgboost as xgb
model = xgb.XGBClassifier()
model.fit(X_train, y_train)
with mlflow.start_run():
mlflow.xgboost.log_model(model, "model")
loaded_model = mlflow.xgboost.load_model("runs:/<run_id>/model")
predictions = loaded_model.predict(X_test)
模型签名 #
什么是模型签名? #
text
┌─────────────────────────────────────────────────────────────┐
│ 模型签名 │
├─────────────────────────────────────────────────────────────┤
│ │
│ 模型签名定义了模型的输入输出规范: │
│ │
│ ├── 输入数据的类型和形状 │
│ ├── 输出数据的类型和形状 │
│ └── 参数的类型和默认值 │
│ │
│ 作用: │
│ ├── 验证输入数据 │
│ ├── 自动生成 API 文档 │
│ └── 确保部署一致性 │
│ │
└─────────────────────────────────────────────────────────────┘
创建模型签名 #
python
import mlflow
from mlflow.models.signature import infer_signature
import pandas as pd
X_train = pd.DataFrame({"feature1": [1, 2, 3], "feature2": [4, 5, 6]})
y_train = pd.Series([0, 1, 0])
model.fit(X_train, y_train)
predictions = model.predict(X_train)
signature = infer_signature(X_train, predictions)
print(signature)
with mlflow.start_run():
mlflow.sklearn.log_model(model, "model", signature=signature)
签名示例 #
python
from mlflow.models.signature import ModelSignature
from mlflow.types.schema import Schema, ColSpec
input_schema = Schema([
ColSpec("double", "feature1"),
ColSpec("double", "feature2"),
ColSpec("string", "category")
])
output_schema = Schema([
ColSpec("double", "prediction"),
ColSpec("double", "probability")
])
signature = ModelSignature(inputs=input_schema, outputs=output_schema)
签名格式 #
text
输入签名示例:
inputs:
[['feature1', double], ['feature2', double], ['category', string]]
outputs:
[['prediction', double], ('probability', double)]
模型保存与加载 #
保存模型 #
python
import mlflow.sklearn
with mlflow.start_run() as run:
mlflow.sklearn.log_model(
sk_model=model,
artifact_path="model",
conda_env="conda.yaml",
code_paths=["src/"],
signature=signature,
input_example=input_example,
registered_model_name="my_model"
)
run_id = run.info.run_id
model_uri = f"runs:/{run_id}/model"
加载模型 #
python
import mlflow.sklearn
model = mlflow.sklearn.load_model("runs:/<run_id>/model")
model = mlflow.sklearn.load_model("models:/my_model/Production")
model = mlflow.sklearn.load_model("models:/my_model/1")
model = mlflow.sklearn.load_model("models:/my_model/latest")
使用 pyfunc 加载 #
python
import mlflow.pyfunc
model = mlflow.pyfunc.load_model("models:/my_model/Production")
predictions = model.predict(data)
输入示例 #
添加输入示例 #
python
import mlflow.sklearn
import pandas as pd
input_example = pd.DataFrame({
"feature1": [1.0, 2.0],
"feature2": [3.0, 4.0],
"category": ["A", "B"]
})
with mlflow.start_run():
mlflow.sklearn.log_model(
model,
"model",
input_example=input_example
)
输入示例的作用 #
text
┌─────────────────────────────────────────────────────────────┐
│ 输入示例作用 │
├─────────────────────────────────────────────────────────────┤
│ │
│ 1. 文档作用: │
│ 展示模型期望的输入格式 │
│ │
│ 2. 测试作用: │
│ 用于验证模型加载后是否正常工作 │
│ │
│ 3. 部署作用: │
│ 帮助生成 API 文档和测试请求 │
│ │
│ 4. 自动推断签名: │
│ 从输入示例推断模型签名 │
│ │
└─────────────────────────────────────────────────────────────┘
模型环境 #
Conda 环境 #
yaml
name: mlflow-env
channels:
- conda-forge
- defaults
dependencies:
- python=3.10
- pip
- pip:
- mlflow==2.10.0
- scikit-learn==1.3.0
- pandas==2.0.0
- numpy==1.24.0
requirements.txt #
text
mlflow==2.10.0
scikit-learn==1.3.0
pandas==2.0.0
numpy==1.24.0
自动生成环境 #
python
import mlflow.sklearn
with mlflow.start_run():
mlflow.sklearn.log_model(
model,
"model",
conda_env={
'channels': ['conda-forge'],
'dependencies': [
'python=3.10',
'pip',
{'pip': ['mlflow', 'scikit-learn']}
],
'name': 'mlflow-env'
}
)
自定义模型 #
创建自定义 PyFunc 模型 #
python
import mlflow.pyfunc
import pandas as pd
class CustomModel(mlflow.pyfunc.PythonModel):
def load_context(self, context):
from sklearn.externals import joblib
self.model = joblib.load(context.artifacts["model_path"])
def predict(self, context, model_input):
predictions = self.model.predict(model_input)
probabilities = self.model.predict_proba(model_input)
return pd.DataFrame({
"prediction": predictions,
"probability": probabilities.max(axis=1)
})
with mlflow.start_run():
mlflow.pyfunc.log_model(
artifact_path="model",
python_model=CustomModel(),
artifacts={"model_path": "model.joblib"},
code_paths=["src/"]
)
自定义模型示例:预处理 + 预测 #
python
import mlflow.pyfunc
import pandas as pd
from sklearn.preprocessing import StandardScaler
class PreprocessedModel(mlflow.pyfunc.PythonModel):
def load_context(self, context):
import joblib
self.scaler = joblib.load(context.artifacts["scaler"])
self.model = joblib.load(context.artifacts["model"])
def predict(self, context, model_input):
scaled_input = self.scaler.transform(model_input)
predictions = self.model.predict(scaled_input)
return predictions
with mlflow.start_run():
mlflow.pyfunc.log_model(
artifact_path="model",
python_model=PreprocessedModel(),
artifacts={
"scaler": "scaler.joblib",
"model": "model.joblib"
}
)
模型持久化 #
保存到本地 #
python
import mlflow.sklearn
mlflow.sklearn.save_model(
model,
path="./my_model",
conda_env="conda.yaml",
signature=signature
)
从本地加载 #
python
import mlflow.sklearn
model = mlflow.sklearn.load_model("./my_model")
保存到远程存储 #
python
import mlflow.sklearn
mlflow.sklearn.log_model(
model,
"model",
registered_model_name="my_model"
)
模型别名 #
设置模型别名 #
python
from mlflow.tracking import MlflowClient
client = MlflowClient()
client.set_registered_model_alias(
name="my_model",
alias="champion",
version="5"
)
client.set_registered_model_alias(
name="my_model",
alias="challenger",
version="6"
)
使用别名加载模型 #
python
import mlflow.pyfunc
model = mlflow.pyfunc.load_model("models:/my_model@champion")
模型评估 #
使用 MLflow 评估模型 #
python
import mlflow
from mlflow.models import evaluate
with mlflow.start_run():
model_info = mlflow.sklearn.log_model(model, "model")
result = evaluate(
model=model_info.model_uri,
data=test_data,
targets="target_column",
model_type="classifier",
evaluators=["default"]
)
print(f"Accuracy: {result.metrics['accuracy']}")
print(f"AUC: {result.metrics['auc']}")
评估指标 #
text
┌─────────────────────────────────────────────────────────────┐
│ 评估指标 │
├─────────────────────────────────────────────────────────────┤
│ │
│ 分类模型: │
│ ├── accuracy │
│ ├── precision / recall / f1_score │
│ ├── auc / roc_auc │
│ ├── log_loss │
│ └── confusion_matrix │
│ │
│ 回归模型: │
│ ├── mae (Mean Absolute Error) │
│ ├── mse (Mean Squared Error) │
│ ├── rmse (Root Mean Squared Error) │
│ ├── r2 (R-squared) │
│ └── mape (Mean Absolute Percentage Error) │
│ │
└─────────────────────────────────────────────────────────────┘
最佳实践 #
1. 始终添加签名 #
python
import mlflow
from mlflow.models.signature import infer_signature
signature = infer_signature(X_train, model.predict(X_train))
mlflow.sklearn.log_model(model, "model", signature=signature)
2. 添加输入示例 #
python
import mlflow
mlflow.sklearn.log_model(
model,
"model",
input_example=X_train.head()
)
3. 指定环境依赖 #
python
import mlflow
mlflow.sklearn.log_model(
model,
"model",
pip_requirements=[
"scikit-learn==1.3.0",
"pandas==2.0.0"
]
)
4. 注册模型 #
python
import mlflow
mlflow.sklearn.log_model(
model,
"model",
registered_model_name="my_model"
)
下一步 #
现在你已经掌握了 MLflow Models 的核心功能,接下来学习 项目打包,了解如何打包可复现的 ML 项目!
最后更新:2026-04-04