生产环境部署 #
部署概述 #
生产环境部署需要考虑可用性、可扩展性、安全性和可维护性。
text
┌─────────────────────────────────────────────────────────────┐
│ 生产环境架构 │
├─────────────────────────────────────────────────────────────┤
│ │
│ 接入层: │
│ ├── 负载均衡 (Nginx/HAProxy) │
│ ├── API 网关 │
│ └── SSL 终止 │
│ │
│ 服务层: │
│ ├── Whisper API 服务 │
│ ├── 任务队列 │
│ └── 结果缓存 │
│ │
│ 基础设施: │
│ ├── 容器编排 (Kubernetes) │
│ ├── 监控告警 (Prometheus/Grafana) │
│ └── 日志收集 (ELK) │
│ │
└─────────────────────────────────────────────────────────────┘
Docker 部署 #
Dockerfile #
dockerfile
FROM python:3.10-slim
WORKDIR /app
RUN apt-get update && apt-get install -y \
ffmpeg \
git \
&& rm -rf /var/lib/apt/lists/*
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY . .
RUN mkdir -p /app/models
ENV WHISPER_MODEL=base
ENV MODEL_DIR=/app/models
RUN python -c "import whisper; whisper.load_model('${WHISPER_MODEL}', download_root='${MODEL_DIR}')"
EXPOSE 8000
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
requirements.txt #
text
openai-whisper==20231117
fastapi==0.104.1
uvicorn==0.24.0
python-multipart==0.0.6
torch==2.1.0
torchaudio==2.1.0
main.py #
python
import whisper
from fastapi import FastAPI, UploadFile, File, HTTPException
from fastapi.middleware.cors import CORSMiddleware
import os
import tempfile
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
app = FastAPI(title="Whisper API", version="1.0.0")
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
MODEL_SIZE = os.getenv("WHISPER_MODEL", "base")
MODEL_DIR = os.getenv("MODEL_DIR", "/app/models")
logger.info(f"Loading model: {MODEL_SIZE}")
model = whisper.load_model(MODEL_SIZE, download_root=MODEL_DIR)
logger.info("Model loaded successfully")
@app.post("/transcribe")
async def transcribe(
file: UploadFile = File(...),
language: str = None,
task: str = "transcribe"
):
logger.info(f"Processing file: {file.filename}")
with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(file.filename)[1]) as tmp:
content = await file.read()
tmp.write(content)
tmp_path = tmp.name
try:
if task == "translate":
result = model.translate(tmp_path, language=language)
else:
result = model.transcribe(tmp_path, language=language)
logger.info(f"Transcription completed: {len(result['text'])} characters")
return {
"text": result["text"],
"language": result["language"],
"segments": result["segments"]
}
except Exception as e:
logger.error(f"Transcription error: {str(e)}")
raise HTTPException(status_code=500, detail=str(e))
finally:
os.unlink(tmp_path)
@app.get("/health")
async def health():
return {"status": "healthy", "model": MODEL_SIZE}
@app.get("/ready")
async def ready():
return {"status": "ready"}
Docker Compose #
yaml
version: '3.8'
services:
whisper-api:
build:
context: .
dockerfile: Dockerfile
args:
WHISPER_MODEL: base
ports:
- "8000:8000"
environment:
- WHISPER_MODEL=base
- MODEL_DIR=/app/models
volumes:
- whisper-models:/app/models
- whisper-cache:/root/.cache
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
interval: 30s
timeout: 10s
retries: 3
nginx:
image: nginx:alpine
ports:
- "80:80"
volumes:
- ./nginx.conf:/etc/nginx/nginx.conf:ro
depends_on:
- whisper-api
volumes:
whisper-models:
whisper-cache:
nginx.conf #
nginx
events {
worker_connections 1024;
}
http {
upstream whisper_backend {
server whisper-api:8000;
}
server {
listen 80;
server_name localhost;
client_max_body_size 100M;
location / {
proxy_pass http://whisper_backend;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_connect_timeout 300s;
proxy_send_timeout 300s;
proxy_read_timeout 300s;
}
location /health {
proxy_pass http://whisper_backend/health;
}
}
}
Kubernetes 部署 #
Deployment #
yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: whisper-api
labels:
app: whisper-api
spec:
replicas: 3
selector:
matchLabels:
app: whisper-api
template:
metadata:
labels:
app: whisper-api
spec:
containers:
- name: whisper-api
image: whisper-api:latest
ports:
- containerPort: 8000
resources:
requests:
memory: "4Gi"
cpu: "2"
nvidia.com/gpu: 1
limits:
memory: "8Gi"
cpu: "4"
nvidia.com/gpu: 1
env:
- name: WHISPER_MODEL
value: "base"
- name: MODEL_DIR
value: "/app/models"
volumeMounts:
- name: models
mountPath: /app/models
livenessProbe:
httpGet:
path: /health
port: 8000
initialDelaySeconds: 30
periodSeconds: 10
readinessProbe:
httpGet:
path: /ready
port: 8000
initialDelaySeconds: 5
periodSeconds: 5
volumes:
- name: models
persistentVolumeClaim:
claimName: whisper-models-pvc
nodeSelector:
accelerator: nvidia
Service #
yaml
apiVersion: v1
kind: Service
metadata:
name: whisper-api
spec:
selector:
app: whisper-api
ports:
- port: 80
targetPort: 8000
type: ClusterIP
Ingress #
yaml
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: whisper-api-ingress
annotations:
nginx.ingress.kubernetes.io/proxy-body-size: "100m"
nginx.ingress.kubernetes.io/proxy-read-timeout: "300"
spec:
rules:
- host: whisper.example.com
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: whisper-api
port:
number: 80
HPA (Horizontal Pod Autoscaler) #
yaml
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: whisper-api-hpa
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: whisper-api
minReplicas: 2
maxReplicas: 10
metrics:
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 70
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: 80
监控配置 #
Prometheus 配置 #
yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: prometheus-config
data:
prometheus.yml: |
global:
scrape_interval: 15s
scrape_configs:
- job_name: 'whisper-api'
kubernetes_sd_configs:
- role: pod
relabel_configs:
- source_labels: [__meta_kubernetes_pod_label_app]
action: keep
regex: whisper-api
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
应用监控指标 #
python
from prometheus_client import Counter, Histogram, generate_latest
from fastapi import Response
TRANSCRIPTION_COUNT = Counter(
'whisper_transcription_total',
'Total number of transcriptions',
['language', 'model']
)
TRANSCRIPTION_DURATION = Histogram(
'whisper_transcription_duration_seconds',
'Time spent processing transcription',
['model']
)
AUDIO_DURATION = Histogram(
'whisper_audio_duration_seconds',
'Duration of processed audio',
['model']
)
@app.post("/transcribe")
async def transcribe(file: UploadFile = File(...), language: str = None):
import time
start_time = time.time()
result = model.transcribe(tmp_path, language=language)
TRANSCRIPTION_COUNT.labels(
language=result["language"],
model=MODEL_SIZE
).inc()
TRANSCRIPTION_DURATION.labels(model=MODEL_SIZE).observe(
time.time() - start_time
)
if result["segments"]:
AUDIO_DURATION.labels(model=MODEL_SIZE).observe(
result["segments"][-1]["end"]
)
return result
@app.get("/metrics")
async def metrics():
return Response(
content=generate_latest(),
media_type="text/plain"
)
日志配置 #
结构化日志 #
python
import logging
import json
from datetime import datetime
class JSONFormatter(logging.Formatter):
def format(self, record):
log_entry = {
"timestamp": datetime.utcnow().isoformat(),
"level": record.levelname,
"message": record.getMessage(),
"module": record.module,
"function": record.funcName,
"line": record.lineno
}
if hasattr(record, 'extra'):
log_entry.update(record.extra)
return json.dumps(log_entry)
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
handler = logging.StreamHandler()
handler.setFormatter(JSONFormatter())
logger.handlers = [handler]
@app.post("/transcribe")
async def transcribe(file: UploadFile = File(...)):
logger.info(
"Processing transcription",
extra={
"filename": file.filename,
"content_type": file.content_type
}
)
result = model.transcribe(tmp_path)
logger.info(
"Transcription completed",
extra={
"text_length": len(result["text"]),
"language": result["language"],
"segments": len(result["segments"])
}
)
return result
安全配置 #
API 认证 #
python
from fastapi import Security, HTTPException
from fastapi.security import APIKeyHeader
API_KEYS = {
"key1": "user1",
"key2": "user2"
}
api_key_header = APIKeyHeader(name="X-API-Key")
async def get_api_key(api_key: str = Security(api_key_header)):
if api_key not in API_KEYS:
raise HTTPException(status_code=403, detail="Invalid API Key")
return API_KEYS[api_key]
@app.post("/transcribe")
async def transcribe(
file: UploadFile = File(...),
user: str = Depends(get_api_key)
):
result = model.transcribe(tmp_path)
return result
速率限制 #
python
from fastapi import Request
from slowapi import Limiter
from slowapi.util import get_remote_address
limiter = Limiter(key_func=get_remote_address)
@app.post("/transcribe")
@limiter.limit("10/minute")
async def transcribe(request: Request, file: UploadFile = File(...)):
result = model.transcribe(tmp_path)
return result
部署检查清单 #
text
部署前检查:
├── 模型文件准备
│ ├── 确认模型已下载
│ ├── 检查模型文件完整性
│ └── 配置模型缓存路径
│
├── 资源配置
│ ├── GPU 内存充足
│ ├── 系统内存充足
│ └── 存储空间充足
│
├── 网络配置
│ ├── 端口映射正确
│ ├── 负载均衡配置
│ └── SSL 证书配置
│
└── 监控配置
├── 健康检查端点
├── 指标收集配置
└── 日志收集配置
部署后验证:
├── 功能测试
│ ├── 基本转录测试
│ ├── 翻译功能测试
│ └── 错误处理测试
│
├── 性能测试
│ ├── 响应时间测试
│ ├── 并发测试
│ └── 资源使用监控
│
└── 安全测试
├── 认证测试
├── 速率限制测试
└── 输入验证测试
下一步 #
恭喜你完成了 Whisper 文档的学习!现在你已经掌握了从基础到高级的所有知识,可以开始构建自己的语音识别应用了。
最后更新:2026-04-05