生产环境部署 #

部署概述 #

生产环境部署需要考虑可用性、可扩展性、安全性和可维护性。

text

┌─────────────────────────────────────────────────────────────┐
│                    生产环境架构                              │
├─────────────────────────────────────────────────────────────┤
│                                                             │
│  接入层:                                                     │
│  ├── 负载均衡 (Nginx/HAProxy)                               │
│  ├── API 网关                                               │
│  └── SSL 终止                                               │
│                                                             │
│  服务层:                                                     │
│  ├── Whisper API 服务                                       │
│  ├── 任务队列                                               │
│  └── 结果缓存                                               │
│                                                             │
│  基础设施:                                                   │
│  ├── 容器编排 (Kubernetes)                                  │
│  ├── 监控告警 (Prometheus/Grafana)                          │
│  └── 日志收集 (ELK)                                         │
│                                                             │
└─────────────────────────────────────────────────────────────┘

Docker 部署 #

Dockerfile #

dockerfile

FROM python:3.10-slim

WORKDIR /app

RUN apt-get update && apt-get install -y \
    ffmpeg \
    git \
    && rm -rf /var/lib/apt/lists/*

COPY requirements.txt .

RUN pip install --no-cache-dir -r requirements.txt

COPY . .

RUN mkdir -p /app/models

ENV WHISPER_MODEL=base
ENV MODEL_DIR=/app/models

RUN python -c "import whisper; whisper.load_model('${WHISPER_MODEL}', download_root='${MODEL_DIR}')"

EXPOSE 8000

CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]

requirements.txt #

text

openai-whisper==20231117
fastapi==0.104.1
uvicorn==0.24.0
python-multipart==0.0.6
torch==2.1.0
torchaudio==2.1.0

main.py #

python

import whisper
from fastapi import FastAPI, UploadFile, File, HTTPException
from fastapi.middleware.cors import CORSMiddleware
import os
import tempfile
import logging

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

app = FastAPI(title="Whisper API", version="1.0.0")

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

MODEL_SIZE = os.getenv("WHISPER_MODEL", "base")
MODEL_DIR = os.getenv("MODEL_DIR", "/app/models")

logger.info(f"Loading model: {MODEL_SIZE}")
model = whisper.load_model(MODEL_SIZE, download_root=MODEL_DIR)
logger.info("Model loaded successfully")

@app.post("/transcribe")
async def transcribe(
    file: UploadFile = File(...),
    language: str = None,
    task: str = "transcribe"
):
    logger.info(f"Processing file: {file.filename}")
    
    with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(file.filename)[1]) as tmp:
        content = await file.read()
        tmp.write(content)
        tmp_path = tmp.name
    
    try:
        if task == "translate":
            result = model.translate(tmp_path, language=language)
        else:
            result = model.transcribe(tmp_path, language=language)
        
        logger.info(f"Transcription completed: {len(result['text'])} characters")
        
        return {
            "text": result["text"],
            "language": result["language"],
            "segments": result["segments"]
        }
    
    except Exception as e:
        logger.error(f"Transcription error: {str(e)}")
        raise HTTPException(status_code=500, detail=str(e))
    
    finally:
        os.unlink(tmp_path)

@app.get("/health")
async def health():
    return {"status": "healthy", "model": MODEL_SIZE}

@app.get("/ready")
async def ready():
    return {"status": "ready"}

Docker Compose #

yaml

version: '3.8'

services:
  whisper-api:
    build:
      context: .
      dockerfile: Dockerfile
      args:
        WHISPER_MODEL: base
    ports:
      - "8000:8000"
    environment:
      - WHISPER_MODEL=base
      - MODEL_DIR=/app/models
    volumes:
      - whisper-models:/app/models
      - whisper-cache:/root/.cache
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: 1
              capabilities: [gpu]
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
      interval: 30s
      timeout: 10s
      retries: 3

  nginx:
    image: nginx:alpine
    ports:
      - "80:80"
    volumes:
      - ./nginx.conf:/etc/nginx/nginx.conf:ro
    depends_on:
      - whisper-api

volumes:
  whisper-models:
  whisper-cache:

nginx.conf #

nginx

events {
    worker_connections 1024;
}

http {
    upstream whisper_backend {
        server whisper-api:8000;
    }

    server {
        listen 80;
        server_name localhost;

        client_max_body_size 100M;

        location / {
            proxy_pass http://whisper_backend;
            proxy_set_header Host $host;
            proxy_set_header X-Real-IP $remote_addr;
            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
            proxy_set_header X-Forwarded-Proto $scheme;

            proxy_connect_timeout 300s;
            proxy_send_timeout 300s;
            proxy_read_timeout 300s;
        }

        location /health {
            proxy_pass http://whisper_backend/health;
        }
    }
}

Kubernetes 部署 #

Deployment #

yaml

apiVersion: apps/v1
kind: Deployment
metadata:
  name: whisper-api
  labels:
    app: whisper-api
spec:
  replicas: 3
  selector:
    matchLabels:
      app: whisper-api
  template:
    metadata:
      labels:
        app: whisper-api
    spec:
      containers:
      - name: whisper-api
        image: whisper-api:latest
        ports:
        - containerPort: 8000
        resources:
          requests:
            memory: "4Gi"
            cpu: "2"
            nvidia.com/gpu: 1
          limits:
            memory: "8Gi"
            cpu: "4"
            nvidia.com/gpu: 1
        env:
        - name: WHISPER_MODEL
          value: "base"
        - name: MODEL_DIR
          value: "/app/models"
        volumeMounts:
        - name: models
          mountPath: /app/models
        livenessProbe:
          httpGet:
            path: /health
            port: 8000
          initialDelaySeconds: 30
          periodSeconds: 10
        readinessProbe:
          httpGet:
            path: /ready
            port: 8000
          initialDelaySeconds: 5
          periodSeconds: 5
      volumes:
      - name: models
        persistentVolumeClaim:
          claimName: whisper-models-pvc
      nodeSelector:
        accelerator: nvidia

Service #

yaml

apiVersion: v1
kind: Service
metadata:
  name: whisper-api
spec:
  selector:
    app: whisper-api
  ports:
  - port: 80
    targetPort: 8000
  type: ClusterIP

Ingress #

yaml

apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
  name: whisper-api-ingress
  annotations:
    nginx.ingress.kubernetes.io/proxy-body-size: "100m"
    nginx.ingress.kubernetes.io/proxy-read-timeout: "300"
spec:
  rules:
  - host: whisper.example.com
    http:
      paths:
      - path: /
        pathType: Prefix
        backend:
          service:
            name: whisper-api
            port:
              number: 80

HPA (Horizontal Pod Autoscaler) #

yaml

apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
  name: whisper-api-hpa
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: whisper-api
  minReplicas: 2
  maxReplicas: 10
  metrics:
  - type: Resource
    resource:
      name: cpu
      target:
        type: Utilization
        averageUtilization: 70
  - type: Resource
    resource:
      name: memory
      target:
        type: Utilization
        averageUtilization: 80

监控配置 #

Prometheus 配置 #

yaml

apiVersion: v1
kind: ConfigMap
metadata:
  name: prometheus-config
data:
  prometheus.yml: |
    global:
      scrape_interval: 15s
    
    scrape_configs:
    - job_name: 'whisper-api'
      kubernetes_sd_configs:
      - role: pod
      relabel_configs:
      - source_labels: [__meta_kubernetes_pod_label_app]
        action: keep
        regex: whisper-api
      - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
        action: keep
        regex: true
      - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
        action: replace
        target_label: __metrics_path__
        regex: (.+)

应用监控指标 #

python

from prometheus_client import Counter, Histogram, generate_latest
from fastapi import Response

TRANSCRIPTION_COUNT = Counter(
    'whisper_transcription_total',
    'Total number of transcriptions',
    ['language', 'model']
)

TRANSCRIPTION_DURATION = Histogram(
    'whisper_transcription_duration_seconds',
    'Time spent processing transcription',
    ['model']
)

AUDIO_DURATION = Histogram(
    'whisper_audio_duration_seconds',
    'Duration of processed audio',
    ['model']
)

@app.post("/transcribe")
async def transcribe(file: UploadFile = File(...), language: str = None):
    import time
    start_time = time.time()
    
    result = model.transcribe(tmp_path, language=language)
    
    TRANSCRIPTION_COUNT.labels(
        language=result["language"],
        model=MODEL_SIZE
    ).inc()
    
    TRANSCRIPTION_DURATION.labels(model=MODEL_SIZE).observe(
        time.time() - start_time
    )
    
    if result["segments"]:
        AUDIO_DURATION.labels(model=MODEL_SIZE).observe(
            result["segments"][-1]["end"]
        )
    
    return result

@app.get("/metrics")
async def metrics():
    return Response(
        content=generate_latest(),
        media_type="text/plain"
    )

日志配置 #

结构化日志 #

python

import logging
import json
from datetime import datetime

class JSONFormatter(logging.Formatter):
    def format(self, record):
        log_entry = {
            "timestamp": datetime.utcnow().isoformat(),
            "level": record.levelname,
            "message": record.getMessage(),
            "module": record.module,
            "function": record.funcName,
            "line": record.lineno
        }
        
        if hasattr(record, 'extra'):
            log_entry.update(record.extra)
        
        return json.dumps(log_entry)

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

handler = logging.StreamHandler()
handler.setFormatter(JSONFormatter())
logger.handlers = [handler]

@app.post("/transcribe")
async def transcribe(file: UploadFile = File(...)):
    logger.info(
        "Processing transcription",
        extra={
            "filename": file.filename,
            "content_type": file.content_type
        }
    )
    
    result = model.transcribe(tmp_path)
    
    logger.info(
        "Transcription completed",
        extra={
            "text_length": len(result["text"]),
            "language": result["language"],
            "segments": len(result["segments"])
        }
    )
    
    return result

安全配置 #

API 认证 #

python

from fastapi import Security, HTTPException
from fastapi.security import APIKeyHeader

API_KEYS = {
    "key1": "user1",
    "key2": "user2"
}

api_key_header = APIKeyHeader(name="X-API-Key")

async def get_api_key(api_key: str = Security(api_key_header)):
    if api_key not in API_KEYS:
        raise HTTPException(status_code=403, detail="Invalid API Key")
    return API_KEYS[api_key]

@app.post("/transcribe")
async def transcribe(
    file: UploadFile = File(...),
    user: str = Depends(get_api_key)
):
    result = model.transcribe(tmp_path)
    return result

速率限制 #

python

from fastapi import Request
from slowapi import Limiter
from slowapi.util import get_remote_address

limiter = Limiter(key_func=get_remote_address)

@app.post("/transcribe")
@limiter.limit("10/minute")
async def transcribe(request: Request, file: UploadFile = File(...)):
    result = model.transcribe(tmp_path)
    return result

部署检查清单 #

text

部署前检查:
├── 模型文件准备
│   ├── 确认模型已下载
│   ├── 检查模型文件完整性
│   └── 配置模型缓存路径
│
├── 资源配置
│   ├── GPU 内存充足
│   ├── 系统内存充足
│   └── 存储空间充足
│
├── 网络配置
│   ├── 端口映射正确
│   ├── 负载均衡配置
│   └── SSL 证书配置
│
└── 监控配置
    ├── 健康检查端点
    ├── 指标收集配置
    └── 日志收集配置

部署后验证:
├── 功能测试
│   ├── 基本转录测试
│   ├── 翻译功能测试
│   └── 错误处理测试
│
├── 性能测试
│   ├── 响应时间测试
│   ├── 并发测试
│   └── 资源使用监控
│
└── 安全测试
    ├── 认证测试
    ├── 速率限制测试
    └── 输入验证测试

下一步 #

恭喜你完成了 Whisper 文档的学习！现在你已经掌握了从基础到高级的所有知识，可以开始构建自己的语音识别应用了。