#
一、自动化运维概述 #
1.1 自动化运维目标 #
- 减少人工操作
- 提高运维效率
- 降低错误率
- 标准化流程
- 可追溯可审计
1.2 自动化工具分类 #
| 类型 | 工具 |
|---|---|
| 配置管理 | Ansible, Puppet, Chef, SaltStack |
| 容器编排 | Kubernetes, Docker Swarm |
| 持续集成 | Jenkins, GitLab CI, GitHub Actions |
| 监控告警 | Prometheus, Zabbix, Nagios |
| 日志管理 | ELK Stack, Graylog |
二、Shell 脚本自动化 #
2.1 系统初始化脚本 #
bash
#!/bin/bash
# 系统初始化脚本
# 设置时区
timedatectl set-timezone Asia/Shanghai
# 更新系统
apt update && apt upgrade -y
# 安装常用工具
apt install -y vim curl wget git htop iotop iftop
# 配置防火墙
ufw allow 22/tcp
ufw allow 80/tcp
ufw allow 443/tcp
ufw --force enable
# 配置 SSH
sed -i 's/#PermitRootLogin yes/PermitRootLogin no/' /etc/ssh/sshd_config
sed -i 's/#PasswordAuthentication yes/PasswordAuthentication no/' /etc/ssh/sshd_config
systemctl restart sshd
# 配置内核参数
cat >> /etc/sysctl.conf << EOF
net.ipv4.tcp_syncookies = 1
net.ipv4.tcp_tw_reuse = 1
net.ipv4.tcp_fin_timeout = 30
EOF
sysctl -p
echo "System initialization completed!"
2.2 服务部署脚本 #
bash
#!/bin/bash
# Web 应用部署脚本
APP_NAME="myapp"
APP_DIR="/opt/$APP_NAME"
BACKUP_DIR="/backup/$APP_NAME"
GIT_REPO="https://github.com/user/myapp.git"
# 创建目录
mkdir -p $APP_DIR
mkdir -p $BACKUP_DIR
# 备份当前版本
if [ -d "$APP_DIR/.git" ]; then
cd $APP_DIR
tar -czf $BACKUP_DIR/backup_$(date +%Y%m%d%H%M%S).tar.gz .
fi
# 拉取代码
cd $APP_DIR
git pull origin main
# 安装依赖
npm install --production
# 重启服务
systemctl restart $APP_NAME
# 健康检查
sleep 5
if curl -s http://localhost:3000/health > /dev/null; then
echo "Deployment successful!"
else
echo "Deployment failed!"
exit 1
fi
2.3 备份脚本 #
bash
#!/bin/bash
# 数据库备份脚本
DATE=$(date +%Y%m%d)
BACKUP_DIR="/backup/mysql"
MYSQL_USER="backup"
MYSQL_PASS="password"
DATABASES=$(mysql -u$MYSQL_USER -p$MYSQL_PASS -e "SHOW DATABASES;" | grep -Ev "(Database|information_schema|performance_schema)")
mkdir -p $BACKUP_DIR
for DB in $DATABASES; do
mysqldump -u$MYSQL_USER -p$MYSQL_PASS $DB | gzip > $BACKUP_DIR/${DB}_${DATE}.sql.gz
done
# 删除 30 天前的备份
find $BACKUP_DIR -name "*.sql.gz" -mtime +30 -delete
echo "Backup completed!"
2.4 监控脚本 #
bash
#!/bin/bash
# 系统监控脚本
CPU_THRESHOLD=80
MEM_THRESHOLD=80
DISK_THRESHOLD=80
EMAIL="admin@example.com"
# CPU 检查
CPU_USAGE=$(top -bn1 | grep "Cpu(s)" | awk '{print int($2)}')
if [ $CPU_USAGE -gt $CPU_THRESHOLD ]; then
echo "CPU usage is ${CPU_USAGE}%" | mail -s "CPU Alert" $EMAIL
fi
# 内存检查
MEM_USAGE=$(free | grep Mem | awk '{print int($3/$2 * 100)}')
if [ $MEM_USAGE -gt $MEM_THRESHOLD ]; then
echo "Memory usage is ${MEM_USAGE}%" | mail -s "Memory Alert" $EMAIL
fi
# 磁盘检查
DISK_USAGE=$(df -h / | tail -1 | awk '{print int($5)}')
if [ $DISK_USAGE -gt $DISK_THRESHOLD ]; then
echo "Disk usage is ${DISK_USAGE}%" | mail -s "Disk Alert" $EMAIL
fi
三、Ansible 自动化 #
3.1 Ansible 基础 #
bash
# 安装 Ansible
sudo apt install ansible
# 配置主机清单
vim /etc/ansible/hosts
[webservers]
web1.example.com
web2.example.com
[dbservers]
db1.example.com
# 测试连接
ansible all -m ping
# 执行命令
ansible all -a "uptime"
ansible webservers -a "df -h"
3.2 Ansible Playbook #
yaml
# site.yml
---
- name: Deploy web application
hosts: webservers
become: yes
vars:
app_dir: /opt/myapp
app_user: www-data
tasks:
- name: Install dependencies
apt:
name:
- nginx
- python3
- python3-pip
state: present
update_cache: yes
- name: Create application directory
file:
path: "{{ app_dir }}"
state: directory
owner: "{{ app_user }}"
group: "{{ app_user }}"
- name: Copy application files
copy:
src: ./app/
dest: "{{ app_dir }}"
owner: "{{ app_user }}"
group: "{{ app_user }}"
- name: Install Python dependencies
pip:
requirements: "{{ app_dir }}/requirements.txt"
- name: Configure Nginx
template:
src: nginx.conf.j2
dest: /etc/nginx/sites-available/myapp
notify: Restart Nginx
- name: Enable site
file:
src: /etc/nginx/sites-available/myapp
dest: /etc/nginx/sites-enabled/myapp
state: link
- name: Start services
service:
name: "{{ item }}"
state: started
enabled: yes
loop:
- nginx
- myapp
handlers:
- name: Restart Nginx
service:
name: nginx
state: restarted
3.3 执行 Playbook #
bash
# 语法检查
ansible-playbook site.yml --syntax-check
# 执行
ansible-playbook site.yml
# 指定主机
ansible-playbook site.yml --limit web1.example.com
# 检查模式(不执行)
ansible-playbook site.yml --check
# 详细输出
ansible-playbook site.yml -v
3.4 Ansible Role #
bash
# 创建 Role
ansible-galaxy init nginx
# Role 结构
nginx/
├── defaults/
│ └── main.yml
├── files/
├── handlers/
│ └── main.yml
├── meta/
│ └── main.yml
├── tasks/
│ └── main.yml
├── templates/
└── vars/
└── main.yml
# 使用 Role
---
- name: Install Nginx
hosts: webservers
roles:
- nginx
四、定时任务自动化 #
4.1 crontab 配置 #
bash
# 编辑 crontab
crontab -e
# 每天凌晨 2 点备份
0 2 * * * /usr/local/bin/backup.sh
# 每小时检查服务
0 * * * * /usr/local/bin/check-service.sh
# 每周一清理日志
0 3 * * 1 /usr/local/bin/clean-logs.sh
# 每 5 分钟监控
*/5 * * * * /usr/local/bin/monitor.sh
# 查看定时任务
crontab -l
# 删除定时任务
crontab -r
4.2 systemd.timer 配置 #
bash
# 创建服务
sudo vim /etc/systemd/system/backup.service
[Unit]
Description=Backup Service
[Service]
Type=oneshot
ExecStart=/usr/local/bin/backup.sh
# 创建定时器
sudo vim /etc/systemd/system/backup.timer
[Unit]
Description=Run backup daily at 2am
[Timer]
OnCalendar=*-*-* 02:00:00
Persistent=true
[Install]
WantedBy=timers.target
# 启用
sudo systemctl daemon-reload
sudo systemctl enable --now backup.timer
五、监控告警自动化 #
5.1 Prometheus + Grafana #
yaml
# docker-compose.yml
version: '3'
services:
prometheus:
image: prom/prometheus
ports:
- "9090:9090"
volumes:
- ./prometheus.yml:/etc/prometheus/prometheus.yml
- prometheus_data:/prometheus
grafana:
image: grafana/grafana
ports:
- "3000:3000"
volumes:
- grafana_data:/var/lib/grafana
environment:
- GF_SECURITY_ADMIN_PASSWORD=admin
node_exporter:
image: prom/node-exporter
ports:
- "9100:9100"
volumes:
prometheus_data:
grafana_data:
5.2 告警配置 #
yaml
# alertmanager.yml
global:
smtp_smarthost: 'smtp.example.com:587'
smtp_from: 'alert@example.com'
smtp_auth_username: 'alert@example.com'
smtp_auth_password: 'password'
route:
receiver: 'team-email'
routes:
- match:
severity: critical
receiver: 'team-email'
receivers:
- name: 'team-email'
email_configs:
- to: 'team@example.com'
5.3 告警规则 #
yaml
# rules.yml
groups:
- name: system
rules:
- alert: HighCPUUsage
expr: 100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 80
for: 5m
labels:
severity: warning
annotations:
summary: "High CPU usage"
- alert: HighMemoryUsage
expr: (1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100 > 80
for: 5m
labels:
severity: warning
annotations:
summary: "High memory usage"
- alert: DiskSpaceLow
expr: (node_filesystem_avail_bytes{mountpoint="/"} / node_filesystem_size_bytes{mountpoint="/"}) * 100 < 20
for: 5m
labels:
severity: critical
annotations:
summary: "Low disk space"
六、CI/CD 自动化 #
6.1 GitLab CI #
yaml
# .gitlab-ci.yml
stages:
- test
- build
- deploy
test:
stage: test
script:
- npm install
- npm test
build:
stage: build
script:
- docker build -t myapp:$CI_COMMIT_SHA .
- docker push myapp:$CI_COMMIT_SHA
deploy:
stage: deploy
script:
- kubectl set image deployment/myapp myapp=myapp:$CI_COMMIT_SHA
only:
- main
6.2 GitHub Actions #
yaml
# .github/workflows/deploy.yml
name: Deploy
on:
push:
branches: [ main ]
jobs:
deploy:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Build
run: |
docker build -t myapp:${{ github.sha }} .
docker push myapp:${{ github.sha }}
- name: Deploy
run: |
kubectl set image deployment/myapp myapp=myapp:${{ github.sha }}
七、实践练习 #
7.1 练习一:Shell 脚本 #
bash
# 1. 创建部署脚本
vim deploy.sh
#!/bin/bash
echo "Deploying application..."
git pull
npm install
npm run build
systemctl restart myapp
echo "Deployment completed!"
# 2. 添加执行权限
chmod +x deploy.sh
# 3. 执行脚本
./deploy.sh
7.2 练习二:Ansible #
bash
# 1. 创建主机清单
vim hosts
[webservers]
localhost
# 2. 创建 Playbook
vim site.yml
---
- name: Install Nginx
hosts: webservers
become: yes
tasks:
- name: Install Nginx
apt:
name: nginx
state: present
# 3. 执行
ansible-playbook -i hosts site.yml
7.3 练习三:定时任务 #
bash
# 1. 创建备份脚本
vim /usr/local/bin/backup.sh
#!/bin/bash
tar -czf /backup/backup_$(date +%Y%m%d).tar.gz /var/www/html
# 2. 添加执行权限
chmod +x /usr/local/bin/backup.sh
# 3. 配置定时任务
crontab -e
0 2 * * * /usr/local/bin/backup.sh
八、小结 #
本章学习了 Linux 自动化运维的核心技术,包括 Shell 脚本、Ansible、定时任务和监控告警。
关键要点:
- Shell 脚本是自动化的基础
- Ansible 是强大的配置管理工具
- 定时任务实现周期性自动化
- 监控告警实现主动运维
- CI/CD 实现持续部署
恭喜你完成 Linux 命令大全的学习!
最后更新:2026-03-27