生产部署 #
部署架构 #
text
┌─────────────────────────────────────────────────────────────┐
│ 生产环境架构 │
├─────────────────────────────────────────────────────────────┤
│ │
│ ┌─────────────┐ │
│ │ Load Balancer│ │
│ └──────┬──────┘ │
│ │ │
│ ┌─────────────────┼─────────────────┐ │
│ │ │ │ │
│ ┌────▼────┐ ┌────▼────┐ ┌────▼────┐ │
│ │ TTS Pod │ │ TTS Pod │ │ TTS Pod │ │
│ │ #1 │ │ #2 │ │ #3 │ │
│ └────┬────┘ └────┬────┘ └────┬────┘ │
│ │ │ │ │
│ └────────────────┼────────────────┘ │
│ │ │
│ ┌─────▼─────┐ │
│ │ Redis │ │
│ │ (Cache) │ │
│ └───────────┘ │
│ │
└─────────────────────────────────────────────────────────────┘
Docker 部署 #
Dockerfile #
dockerfile
# 使用官方镜像
FROM pytorch/pytorch:2.0.1-cuda11.7-cudnn8-runtime
# 设置工作目录
WORKDIR /app
# 安装系统依赖
RUN apt-get update && apt-get install -y \
espeak-ng \
libsndfile1 \
ffmpeg \
&& rm -rf /var/lib/apt/lists/*
# 安装 Python 依赖
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# 安装 TTS
RUN pip install --no-cache-dir TTS
# 复制应用代码
COPY . .
# 预下载模型(可选)
RUN python -c "from TTS.api import TTS; TTS('tts_models/en/ljspeech/vits')"
# 暴露端口
EXPOSE 5002
# 启动命令
CMD ["tts-server", "--model_name", "tts_models/en/ljspeech/vits", "--host", "0.0.0.0", "--port", "5002"]
Docker Compose #
yaml
version: '3.8'
services:
tts-server:
build: .
ports:
- "5002:5002"
environment:
- CUDA_VISIBLE_DEVICES=0
volumes:
- ./models:/root/.local/share/tts
- ./output:/app/output
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:5002/"]
interval: 30s
timeout: 10s
retries: 3
restart: unless-stopped
redis:
image: redis:alpine
ports:
- "6379:6379"
volumes:
- redis_data:/data
restart: unless-stopped
nginx:
image: nginx:alpine
ports:
- "80:80"
volumes:
- ./nginx.conf:/etc/nginx/nginx.conf:ro
depends_on:
- tts-server
restart: unless-stopped
volumes:
redis_data:
构建和运行 #
bash
# 构建镜像
docker build -t tts-server:latest .
# 运行容器
docker run -d \
--name tts-server \
--gpus all \
-p 5002:5002 \
-v $(pwd)/models:/root/.local/share/tts \
tts-server:latest
# 使用 Docker Compose
docker-compose up -d
Kubernetes 部署 #
Deployment 配置 #
yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: tts-server
labels:
app: tts-server
spec:
replicas: 3
selector:
matchLabels:
app: tts-server
template:
metadata:
labels:
app: tts-server
spec:
containers:
- name: tts-server
image: tts-server:latest
ports:
- containerPort: 5002
resources:
limits:
nvidia.com/gpu: 1
memory: "8Gi"
cpu: "4"
requests:
memory: "4Gi"
cpu: "2"
env:
- name: CUDA_VISIBLE_DEVICES
value: "0"
volumeMounts:
- name: model-storage
mountPath: /root/.local/share/tts
livenessProbe:
httpGet:
path: /
port: 5002
initialDelaySeconds: 60
periodSeconds: 30
readinessProbe:
httpGet:
path: /
port: 5002
initialDelaySeconds: 30
periodSeconds: 10
volumes:
- name: model-storage
persistentVolumeClaim:
claimName: tts-models-pvc
nodeSelector:
accelerator: nvidia-gpu
Service 配置 #
yaml
apiVersion: v1
kind: Service
metadata:
name: tts-service
spec:
selector:
app: tts-server
ports:
- port: 80
targetPort: 5002
type: LoadBalancer
HPA 配置 #
yaml
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: tts-hpa
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: tts-server
minReplicas: 2
maxReplicas: 10
metrics:
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 70
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: 80
性能优化 #
模型优化 #
python
import torch
from TTS.api import TTS
def optimize_for_production(model_name):
tts = TTS(model_name)
model = tts.synthesizer.tts_model
# 1. 设置为评估模式
model.eval()
# 2. 使用半精度
model = model.half()
# 3. 使用 TorchScript
model = torch.jit.script(model)
# 4. 优化内存
torch.cuda.empty_cache()
return model
# 使用优化后的模型
optimized_model = optimize_for_production("tts_models/en/ljspeech/vits")
推理优化 #
python
import torch
from TTS.api import TTS
from typing import List
import asyncio
from concurrent.futures import ThreadPoolExecutor
class OptimizedTTS:
def __init__(self, model_name, batch_size=4):
self.device = "cuda" if torch.cuda.is_available() else "cpu"
self.tts = TTS(model_name).to(self.device)
self.batch_size = batch_size
self.executor = ThreadPoolExecutor(max_workers=4)
def synthesize_batch(self, texts: List[str]) -> List:
"""批量合成"""
results = []
for i in range(0, len(texts), self.batch_size):
batch = texts[i:i + self.batch_size]
batch_results = [
self.tts.tts(text=text) for text in batch
]
results.extend(batch_results)
return results
async def synthesize_async(self, text: str):
"""异步合成"""
loop = asyncio.get_event_loop()
return await loop.run_in_executor(
self.executor,
self.tts.tts,
text
)
缓存优化 #
python
import redis
import hashlib
import json
import base64
import soundfile as sf
import io
class TTSCache:
def __init__(self, redis_host="localhost", redis_port=6379):
self.redis = redis.Redis(
host=redis_host,
port=redis_port,
decode_responses=False
)
self.ttl = 3600 # 1小时过期
def _get_key(self, text, **kwargs):
data = {"text": text, **kwargs}
return hashlib.md5(json.dumps(data).encode()).hexdigest()
def get(self, text, **kwargs):
key = self._get_key(text, **kwargs)
cached = self.redis.get(key)
if cached:
audio_bytes = base64.b64decode(cached)
return audio_bytes
return None
def set(self, text, audio, **kwargs):
key = self._get_key(text, **kwargs)
audio_base64 = base64.b64encode(audio).decode()
self.redis.setex(key, self.ttl, audio_base64)
def clear(self):
self.redis.flushdb()
# 在 API 中使用缓存
cache = TTSCache()
def synthesize_with_cache(text):
cached = cache.get(text)
if cached:
return cached
wav = tts.tts(text=text)
# 转换为字节
buffer = io.BytesIO()
sf.write(buffer, wav, 22050, format="WAV")
audio_bytes = buffer.getvalue()
cache.set(text, audio_bytes)
return audio_bytes
监控告警 #
Prometheus 指标 #
python
from prometheus_client import Counter, Histogram, Gauge, start_http_server
import time
# 定义指标
REQUEST_COUNT = Counter(
'tts_requests_total',
'Total TTS requests',
['status']
)
REQUEST_LATENCY = Histogram(
'tts_request_latency_seconds',
'Request latency in seconds',
buckets=[0.1, 0.5, 1.0, 2.0, 5.0, 10.0]
)
ACTIVE_REQUESTS = Gauge(
'tts_active_requests',
'Number of active requests'
)
AUDIO_DURATION = Histogram(
'tts_audio_duration_seconds',
'Generated audio duration in seconds',
buckets=[1, 5, 10, 30, 60, 120]
)
# 启动指标服务
start_http_server(9090)
# 在 API 中使用
@app.get("/api/tts")
@REQUEST_LATENCY.time()
async def synthesize(text: str):
ACTIVE_REQUESTS.inc()
try:
start_time = time.time()
wav = tts.tts(text=text)
REQUEST_COUNT.labels(status='success').inc()
AUDIO_DURATION.observe(len(wav) / 22050)
return wav
except Exception as e:
REQUEST_COUNT.labels(status='error').inc()
raise e
finally:
ACTIVE_REQUESTS.dec()
健康检查 #
python
from fastapi import FastAPI
import torch
from TTS.api import TTS
import time
app = FastAPI()
class HealthChecker:
def __init__(self, tts):
self.tts = tts
self.last_check = None
self.status = "unknown"
def check(self):
try:
# 测试合成
start = time.time()
wav = self.tts.tts(text="test")
latency = time.time() - start
# 检查 GPU
gpu_available = torch.cuda.is_available()
gpu_memory = None
if gpu_available:
gpu_memory = torch.cuda.memory_allocated() / 1024**3
self.status = "healthy"
self.last_check = time.time()
return {
"status": "healthy",
"latency": latency,
"gpu_available": gpu_available,
"gpu_memory_gb": gpu_memory,
"last_check": self.last_check
}
except Exception as e:
self.status = "unhealthy"
return {
"status": "unhealthy",
"error": str(e)
}
health_checker = None
@app.on_event("startup")
async def startup():
global health_checker
tts = TTS("tts_models/en/ljspeech/vits")
health_checker = HealthChecker(tts)
@app.get("/health")
async def health():
return health_checker.check()
@app.get("/ready")
async def ready():
if health_checker and health_checker.status == "healthy":
return {"status": "ready"}
return {"status": "not ready"}, 503
日志配置 #
python
import logging
import json
from datetime import datetime
class JSONFormatter(logging.Formatter):
def format(self, record):
log_data = {
"timestamp": datetime.utcnow().isoformat(),
"level": record.levelname,
"message": record.getMessage(),
"module": record.module,
"function": record.funcName,
"line": record.lineno
}
if hasattr(record, 'request_id'):
log_data['request_id'] = record.request_id
if record.exc_info:
log_data['exception'] = self.formatException(record.exc_info)
return json.dumps(log_data)
# 配置日志
logger = logging.getLogger("tts")
logger.setLevel(logging.INFO)
handler = logging.StreamHandler()
handler.setFormatter(JSONFormatter())
logger.addHandler(handler)
# 使用
logger.info("TTS server started", extra={"request_id": "123"})
高可用配置 #
负载均衡 #
nginx
# nginx.conf
upstream tts_backend {
least_conn;
server tts-server-1:5002 weight=1;
server tts-server-2:5002 weight=1;
server tts-server-3:5002 weight=1;
}
server {
listen 80;
location / {
proxy_pass http://tts_backend;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
# 超时配置
proxy_connect_timeout 60s;
proxy_send_timeout 60s;
proxy_read_timeout 60s;
# 缓冲配置
proxy_buffering on;
proxy_buffer_size 4k;
proxy_buffers 8 4k;
}
location /health {
access_log off;
return 200 "healthy\n";
}
}
故障恢复 #
python
import time
from functools import wraps
def retry(max_retries=3, delay=1, backoff=2):
def decorator(func):
@wraps(func)
def wrapper(*args, **kwargs):
retries = 0
while retries < max_retries:
try:
return func(*args, **kwargs)
except Exception as e:
retries += 1
if retries >= max_retries:
raise e
time.sleep(delay * (backoff ** (retries - 1)))
return None
return wrapper
return decorator
class FailoverTTS:
def __init__(self, model_names):
self.models = []
for name in model_names:
try:
tts = TTS(name)
self.models.append(tts)
except Exception as e:
print(f"Failed to load {name}: {e}")
@retry(max_retries=3)
def synthesize(self, text):
for model in self.models:
try:
return model.tts(text=text)
except Exception as e:
print(f"Model failed: {e}")
continue
raise Exception("All models failed")
# 使用
failover_tts = FailoverTTS([
"tts_models/en/ljspeech/vits",
"tts_models/en/ljspeech/tacotron2-DDC"
])
部署检查清单 #
text
┌─────────────────────────────────────────────────────────────┐
│ 部署检查清单 │
├─────────────────────────────────────────────────────────────┤
│ │
│ 环境准备: │
│ □ GPU 驱动已安装 │
│ □ CUDA/cuDNN 已配置 │
│ □ Docker 已安装 │
│ □ Kubernetes 集群已配置(如适用) │
│ │
│ 模型准备: │
│ □ 模型已下载并测试 │
│ □ 模型文件已备份 │
│ □ 模型大小和内存需求已确认 │
│ │
│ 服务配置: │
│ □ 端口已配置 │
│ □ 超时时间已设置 │
│ □ 并发限制已配置 │
│ □ 缓存已启用 │
│ │
│ 监控告警: │
│ □ 健康检查端点已配置 │
│ □ Prometheus 指标已启用 │
│ □ 日志收集已配置 │
│ □ 告警规则已设置 │
│ │
│ 安全配置: │
│ □ HTTPS 已启用 │
│ □ 认证已配置(如需要) │
│ □ 速率限制已配置 │
│ □ 输入验证已实现 │
│ │
└─────────────────────────────────────────────────────────────┘
总结 #
恭喜你完成了 Coqui TTS 的学习之旅!你现在应该能够:
- 安装和配置 Coqui TTS
- 使用预训练模型进行语音合成
- 实现声音克隆功能
- 训练和微调自定义模型
- 部署生产级 TTS 服务
继续探索和实践,构建更多有趣的语音应用!
最后更新:2026-04-05