生产环境最佳实践 #
生产环境要求 #
核心要求 #
text
┌─────────────────────────────────────────────────────┐
│ 生产环境要求 │
├─────────────────────────────────────────────────────┤
│ │
│ 1. 高可用 - 服务持续可用 │
│ 2. 可扩展 - 支持水平扩展 │
│ 3. 安全性 - 数据和访问安全 │
│ 4. 可监控 - 全面的监控和告警 │
│ 5. 可恢复 - 快速故障恢复 │
│ 6. 可追溯 - 完整的日志和审计 │
│ │
└─────────────────────────────────────────────────────┘
镜像管理 #
镜像版本策略 #
bash
# 语义化版本
myapp:v1.0.0
myapp:v1.0.1
myapp:v1.1.0
# 环境标签
myapp:dev
myapp:staging
myapp:prod
# 时间戳标签
myapp:20240101-120000
镜像仓库策略 #
yaml
# 多环境仓库
dev: registry.dev.example.com/myapp
staging: registry.staging.example.com/myapp
prod: registry.prod.example.com/myapp
# 或单仓库多项目
registry.example.com/dev/myapp
registry.example.com/staging/myapp
registry.example.com/prod/myapp
镜像安全扫描 #
bash
# CI/CD中集成扫描
trivy image --exit-code 1 --severity HIGH,CRITICAL myapp:v1.0
# 定期扫描仓库
for image in $(docker images -q); do
trivy image $image
done
容器配置 #
资源限制 #
yaml
# docker-compose.yml
services:
app:
image: myapp:v1.0
deploy:
resources:
limits:
cpus: '1'
memory: 1G
reservations:
cpus: '0.5'
memory: 512M
健康检查 #
yaml
services:
app:
image: myapp:v1.0
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8080/health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 40s
重启策略 #
yaml
services:
app:
image: myapp:v1.0
restart: unless-stopped
deploy:
restart_policy:
condition: on-failure
delay: 5s
max_attempts: 3
高可用配置 #
多副本部署 #
yaml
services:
app:
image: myapp:v1.0
deploy:
replicas: 3
update_config:
parallelism: 1
delay: 10s
failure_action: rollback
rollback_config:
parallelism: 1
负载均衡 #
yaml
services:
nginx:
image: nginx:alpine
ports:
- "80:80"
volumes:
- ./nginx.conf:/etc/nginx/nginx.conf
depends_on:
- app1
- app2
- app3
app1:
image: myapp:v1.0
app2:
image: myapp:v1.0
app3:
image: myapp:v1.0
数据库高可用 #
yaml
services:
mysql-master:
image: mysql:8.0
environment:
MYSQL_ROOT_PASSWORD: root
volumes:
- mysql-master-data:/var/lib/mysql
mysql-slave:
image: mysql:8.0
environment:
MYSQL_ROOT_PASSWORD: root
MYSQL_MASTER_HOST: mysql-master
volumes:
- mysql-slave-data:/var/lib/mysql
depends_on:
- mysql-master
监控告警 #
Prometheus监控 #
yaml
# docker-compose.yml
services:
prometheus:
image: prom/prometheus
ports:
- "9090:9090"
volumes:
- ./prometheus.yml:/etc/prometheus/prometheus.yml
- prometheus-data:/prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.retention.time=30d'
grafana:
image: grafana/grafana
ports:
- "3000:3000"
volumes:
- grafana-data:/var/lib/grafana
environment:
- GF_SECURITY_ADMIN_PASSWORD=admin
alertmanager:
image: prom/alertmanager
ports:
- "9093:9093"
volumes:
- ./alertmanager.yml:/etc/alertmanager/alertmanager.yml
volumes:
prometheus-data:
grafana-data:
告警规则 #
yaml
# alert_rules.yml
groups:
- name: container_alerts
rules:
- alert: ContainerDown
expr: container_last_seen{name!=""} - time() > 60
for: 1m
labels:
severity: critical
annotations:
summary: "Container {{ $labels.name }} is down"
- alert: HighCPU
expr: rate(container_cpu_usage_seconds_total{name!=""}[5m]) > 0.8
for: 5m
labels:
severity: warning
annotations:
summary: "Container {{ $labels.name }} high CPU usage"
- alert: HighMemory
expr: container_memory_usage_bytes / container_spec_memory_limit_bytes > 0.9
for: 5m
labels:
severity: warning
annotations:
summary: "Container {{ $labels.name }} high memory usage"
日志管理 #
日志配置 #
json
// /etc/docker/daemon.json
{
"log-driver": "json-file",
"log-opts": {
"max-size": "10m",
"max-file": "5",
"labels": "app,environment"
}
}
ELK日志收集 #
yaml
services:
elasticsearch:
image: elasticsearch:8.10.0
environment:
- discovery.type=single-node
- xpack.security.enabled=false
volumes:
- elasticsearch-data:/usr/share/elasticsearch/data
logstash:
image: logstash:8.10.0
volumes:
- ./logstash.conf:/usr/share/logstash/pipeline/logstash.conf
kibana:
image: kibana:8.10.0
environment:
- ELASTICSEARCH_HOSTS=http://elasticsearch:9200
ports:
- "5601:5601"
volumes:
elasticsearch-data:
备份恢复 #
数据备份 #
bash
#!/bin/bash
# backup.sh
DATE=$(date +%Y%m%d_%H%M%S)
BACKUP_DIR="/backup"
# 备份数据卷
docker run --rm \
-v mysql-data:/data \
-v $BACKUP_DIR:/backup \
alpine tar czf /backup/mysql-$DATE.tar.gz -C /data .
# 备份配置
tar czf $BACKUP_DIR/config-$DATE.tar.gz /etc/docker
# 清理旧备份
find $BACKUP_DIR -name "*.tar.gz" -mtime +30 -delete
自动备份 #
bash
# crontab -e
0 2 * * * /path/to/backup.sh >> /var/log/backup.log 2>&1
安全加固 #
网络隔离 #
yaml
networks:
frontend:
driver: bridge
backend:
driver: bridge
internal: true
database:
driver: bridge
internal: true
访问控制 #
yaml
services:
app:
image: myapp:v1.0
read_only: true
cap_drop:
- ALL
cap_add:
- NET_BIND_SERVICE
security_opt:
- no-new-privileges:true
运维清单 #
部署前检查 #
text
┌─────────────────────────────────────────────────────┐
│ 部署前检查清单 │
├─────────────────────────────────────────────────────┤
│ │
│ [ ] 镜像安全扫描通过 │
│ [ ] 资源限制配置正确 │
│ [ ] 健康检查配置正确 │
│ [ ] 日志配置正确 │
│ [ ] 监控配置正确 │
│ [ ] 告警规则配置 │
│ [ ] 备份策略配置 │
│ [ ] 回滚方案准备 │
│ │
└─────────────────────────────────────────────────────┘
日常运维 #
text
┌─────────────────────────────────────────────────────┐
│ 日常运维清单 │
├─────────────────────────────────────────────────────┤
│ │
│ 每日: │
│ [ ] 检查服务状态 │
│ [ ] 检查资源使用 │
│ [ ] 检查告警信息 │
│ │
│ 每周: │
│ [ ] 检查日志大小 │
│ [ ] 检查备份完整性 │
│ [ ] 检查安全更新 │
│ │
│ 每月: │
│ [ ] 清理旧镜像 │
│ [ ] 清理旧备份 │
│ [ ] 安全审计 │
│ │
└─────────────────────────────────────────────────────┘
小结 #
本节学习了生产环境的最佳实践:
- 镜像管理策略
- 容器配置优化
- 高可用配置
- 监控告警配置
- 日志管理
- 备份恢复
- 安全加固
总结 #
恭喜你完成了Docker容器化技术完全指南的学习!你已经掌握了:
- Docker基础概念和操作
- 镜像管理和Dockerfile编写
- 容器管理和资源限制
- 数据持久化和网络配置
- Docker Compose多容器编排
- 镜像仓库管理
- 高级特性和性能优化
- 生产环境最佳实践
继续实践,不断探索Docker的更多可能性!