#

一、自动化运维概述 #

1.1 自动化运维目标 #

  • 减少人工操作
  • 提高运维效率
  • 降低错误率
  • 标准化流程
  • 可追溯可审计

1.2 自动化工具分类 #

类型 工具
配置管理 Ansible, Puppet, Chef, SaltStack
容器编排 Kubernetes, Docker Swarm
持续集成 Jenkins, GitLab CI, GitHub Actions
监控告警 Prometheus, Zabbix, Nagios
日志管理 ELK Stack, Graylog

二、Shell 脚本自动化 #

2.1 系统初始化脚本 #

bash
#!/bin/bash
# 系统初始化脚本

# 设置时区
timedatectl set-timezone Asia/Shanghai

# 更新系统
apt update && apt upgrade -y

# 安装常用工具
apt install -y vim curl wget git htop iotop iftop

# 配置防火墙
ufw allow 22/tcp
ufw allow 80/tcp
ufw allow 443/tcp
ufw --force enable

# 配置 SSH
sed -i 's/#PermitRootLogin yes/PermitRootLogin no/' /etc/ssh/sshd_config
sed -i 's/#PasswordAuthentication yes/PasswordAuthentication no/' /etc/ssh/sshd_config
systemctl restart sshd

# 配置内核参数
cat >> /etc/sysctl.conf << EOF
net.ipv4.tcp_syncookies = 1
net.ipv4.tcp_tw_reuse = 1
net.ipv4.tcp_fin_timeout = 30
EOF
sysctl -p

echo "System initialization completed!"

2.2 服务部署脚本 #

bash
#!/bin/bash
# Web 应用部署脚本

APP_NAME="myapp"
APP_DIR="/opt/$APP_NAME"
BACKUP_DIR="/backup/$APP_NAME"
GIT_REPO="https://github.com/user/myapp.git"

# 创建目录
mkdir -p $APP_DIR
mkdir -p $BACKUP_DIR

# 备份当前版本
if [ -d "$APP_DIR/.git" ]; then
    cd $APP_DIR
    tar -czf $BACKUP_DIR/backup_$(date +%Y%m%d%H%M%S).tar.gz .
fi

# 拉取代码
cd $APP_DIR
git pull origin main

# 安装依赖
npm install --production

# 重启服务
systemctl restart $APP_NAME

# 健康检查
sleep 5
if curl -s http://localhost:3000/health > /dev/null; then
    echo "Deployment successful!"
else
    echo "Deployment failed!"
    exit 1
fi

2.3 备份脚本 #

bash
#!/bin/bash
# 数据库备份脚本

DATE=$(date +%Y%m%d)
BACKUP_DIR="/backup/mysql"
MYSQL_USER="backup"
MYSQL_PASS="password"
DATABASES=$(mysql -u$MYSQL_USER -p$MYSQL_PASS -e "SHOW DATABASES;" | grep -Ev "(Database|information_schema|performance_schema)")

mkdir -p $BACKUP_DIR

for DB in $DATABASES; do
    mysqldump -u$MYSQL_USER -p$MYSQL_PASS $DB | gzip > $BACKUP_DIR/${DB}_${DATE}.sql.gz
done

# 删除 30 天前的备份
find $BACKUP_DIR -name "*.sql.gz" -mtime +30 -delete

echo "Backup completed!"

2.4 监控脚本 #

bash
#!/bin/bash
# 系统监控脚本

CPU_THRESHOLD=80
MEM_THRESHOLD=80
DISK_THRESHOLD=80
EMAIL="admin@example.com"

# CPU 检查
CPU_USAGE=$(top -bn1 | grep "Cpu(s)" | awk '{print int($2)}')
if [ $CPU_USAGE -gt $CPU_THRESHOLD ]; then
    echo "CPU usage is ${CPU_USAGE}%" | mail -s "CPU Alert" $EMAIL
fi

# 内存检查
MEM_USAGE=$(free | grep Mem | awk '{print int($3/$2 * 100)}')
if [ $MEM_USAGE -gt $MEM_THRESHOLD ]; then
    echo "Memory usage is ${MEM_USAGE}%" | mail -s "Memory Alert" $EMAIL
fi

# 磁盘检查
DISK_USAGE=$(df -h / | tail -1 | awk '{print int($5)}')
if [ $DISK_USAGE -gt $DISK_THRESHOLD ]; then
    echo "Disk usage is ${DISK_USAGE}%" | mail -s "Disk Alert" $EMAIL
fi

三、Ansible 自动化 #

3.1 Ansible 基础 #

bash
# 安装 Ansible
sudo apt install ansible

# 配置主机清单
vim /etc/ansible/hosts

[webservers]
web1.example.com
web2.example.com

[dbservers]
db1.example.com

# 测试连接
ansible all -m ping

# 执行命令
ansible all -a "uptime"
ansible webservers -a "df -h"

3.2 Ansible Playbook #

yaml
# site.yml
---
- name: Deploy web application
  hosts: webservers
  become: yes
  vars:
    app_dir: /opt/myapp
    app_user: www-data
  
  tasks:
    - name: Install dependencies
      apt:
        name:
          - nginx
          - python3
          - python3-pip
        state: present
        update_cache: yes
    
    - name: Create application directory
      file:
        path: "{{ app_dir }}"
        state: directory
        owner: "{{ app_user }}"
        group: "{{ app_user }}"
    
    - name: Copy application files
      copy:
        src: ./app/
        dest: "{{ app_dir }}"
        owner: "{{ app_user }}"
        group: "{{ app_user }}"
    
    - name: Install Python dependencies
      pip:
        requirements: "{{ app_dir }}/requirements.txt"
    
    - name: Configure Nginx
      template:
        src: nginx.conf.j2
        dest: /etc/nginx/sites-available/myapp
      notify: Restart Nginx
    
    - name: Enable site
      file:
        src: /etc/nginx/sites-available/myapp
        dest: /etc/nginx/sites-enabled/myapp
        state: link
    
    - name: Start services
      service:
        name: "{{ item }}"
        state: started
        enabled: yes
      loop:
        - nginx
        - myapp
  
  handlers:
    - name: Restart Nginx
      service:
        name: nginx
        state: restarted

3.3 执行 Playbook #

bash
# 语法检查
ansible-playbook site.yml --syntax-check

# 执行
ansible-playbook site.yml

# 指定主机
ansible-playbook site.yml --limit web1.example.com

# 检查模式(不执行)
ansible-playbook site.yml --check

# 详细输出
ansible-playbook site.yml -v

3.4 Ansible Role #

bash
# 创建 Role
ansible-galaxy init nginx

# Role 结构
nginx/
├── defaults/
│   └── main.yml
├── files/
├── handlers/
│   └── main.yml
├── meta/
│   └── main.yml
├── tasks/
│   └── main.yml
├── templates/
└── vars/
    └── main.yml

# 使用 Role
---
- name: Install Nginx
  hosts: webservers
  roles:
    - nginx

四、定时任务自动化 #

4.1 crontab 配置 #

bash
# 编辑 crontab
crontab -e

# 每天凌晨 2 点备份
0 2 * * * /usr/local/bin/backup.sh

# 每小时检查服务
0 * * * * /usr/local/bin/check-service.sh

# 每周一清理日志
0 3 * * 1 /usr/local/bin/clean-logs.sh

# 每 5 分钟监控
*/5 * * * * /usr/local/bin/monitor.sh

# 查看定时任务
crontab -l

# 删除定时任务
crontab -r

4.2 systemd.timer 配置 #

bash
# 创建服务
sudo vim /etc/systemd/system/backup.service

[Unit]
Description=Backup Service

[Service]
Type=oneshot
ExecStart=/usr/local/bin/backup.sh

# 创建定时器
sudo vim /etc/systemd/system/backup.timer

[Unit]
Description=Run backup daily at 2am

[Timer]
OnCalendar=*-*-* 02:00:00
Persistent=true

[Install]
WantedBy=timers.target

# 启用
sudo systemctl daemon-reload
sudo systemctl enable --now backup.timer

五、监控告警自动化 #

5.1 Prometheus + Grafana #

yaml
# docker-compose.yml
version: '3'

services:
  prometheus:
    image: prom/prometheus
    ports:
      - "9090:9090"
    volumes:
      - ./prometheus.yml:/etc/prometheus/prometheus.yml
      - prometheus_data:/prometheus
  
  grafana:
    image: grafana/grafana
    ports:
      - "3000:3000"
    volumes:
      - grafana_data:/var/lib/grafana
    environment:
      - GF_SECURITY_ADMIN_PASSWORD=admin
  
  node_exporter:
    image: prom/node-exporter
    ports:
      - "9100:9100"

volumes:
  prometheus_data:
  grafana_data:

5.2 告警配置 #

yaml
# alertmanager.yml
global:
  smtp_smarthost: 'smtp.example.com:587'
  smtp_from: 'alert@example.com'
  smtp_auth_username: 'alert@example.com'
  smtp_auth_password: 'password'

route:
  receiver: 'team-email'
  routes:
    - match:
        severity: critical
      receiver: 'team-email'

receivers:
  - name: 'team-email'
    email_configs:
      - to: 'team@example.com'

5.3 告警规则 #

yaml
# rules.yml
groups:
  - name: system
    rules:
      - alert: HighCPUUsage
        expr: 100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 80
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: "High CPU usage"
      
      - alert: HighMemoryUsage
        expr: (1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100 > 80
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: "High memory usage"
      
      - alert: DiskSpaceLow
        expr: (node_filesystem_avail_bytes{mountpoint="/"} / node_filesystem_size_bytes{mountpoint="/"}) * 100 < 20
        for: 5m
        labels:
          severity: critical
        annotations:
          summary: "Low disk space"

六、CI/CD 自动化 #

6.1 GitLab CI #

yaml
# .gitlab-ci.yml
stages:
  - test
  - build
  - deploy

test:
  stage: test
  script:
    - npm install
    - npm test

build:
  stage: build
  script:
    - docker build -t myapp:$CI_COMMIT_SHA .
    - docker push myapp:$CI_COMMIT_SHA

deploy:
  stage: deploy
  script:
    - kubectl set image deployment/myapp myapp=myapp:$CI_COMMIT_SHA
  only:
    - main

6.2 GitHub Actions #

yaml
# .github/workflows/deploy.yml
name: Deploy

on:
  push:
    branches: [ main ]

jobs:
  deploy:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v2
      
      - name: Build
        run: |
          docker build -t myapp:${{ github.sha }} .
          docker push myapp:${{ github.sha }}
      
      - name: Deploy
        run: |
          kubectl set image deployment/myapp myapp=myapp:${{ github.sha }}

七、实践练习 #

7.1 练习一:Shell 脚本 #

bash
# 1. 创建部署脚本
vim deploy.sh

#!/bin/bash
echo "Deploying application..."
git pull
npm install
npm run build
systemctl restart myapp
echo "Deployment completed!"

# 2. 添加执行权限
chmod +x deploy.sh

# 3. 执行脚本
./deploy.sh

7.2 练习二:Ansible #

bash
# 1. 创建主机清单
vim hosts
[webservers]
localhost

# 2. 创建 Playbook
vim site.yml
---
- name: Install Nginx
  hosts: webservers
  become: yes
  tasks:
    - name: Install Nginx
      apt:
        name: nginx
        state: present

# 3. 执行
ansible-playbook -i hosts site.yml

7.3 练习三:定时任务 #

bash
# 1. 创建备份脚本
vim /usr/local/bin/backup.sh
#!/bin/bash
tar -czf /backup/backup_$(date +%Y%m%d).tar.gz /var/www/html

# 2. 添加执行权限
chmod +x /usr/local/bin/backup.sh

# 3. 配置定时任务
crontab -e
0 2 * * * /usr/local/bin/backup.sh

八、小结 #

本章学习了 Linux 自动化运维的核心技术,包括 Shell 脚本、Ansible、定时任务和监控告警。

关键要点:

  1. Shell 脚本是自动化的基础
  2. Ansible 是强大的配置管理工具
  3. 定时任务实现周期性自动化
  4. 监控告警实现主动运维
  5. CI/CD 实现持续部署

恭喜你完成 Linux 命令大全的学习!

最后更新:2026-03-27