Prometheus安装与配置 #
一、安装概述 #
1.1 安装方式 #
text
Prometheus安装方式:
┌─────────────────────────────────────────────┐
│ 1. 二进制安装 │
│ • 直接下载可执行文件 │
│ • 适合Linux服务器 │
│ • 手动配置和管理 │
├─────────────────────────────────────────────┤
│ 2. Docker安装 │
│ • 使用Docker容器 │
│ • 快速部署 │
│ • 易于管理 │
├─────────────────────────────────────────────┤
│ 3. Kubernetes安装 │
│ • 使用Helm或Operator │
│ • 云原生部署 │
│ • 自动化管理 │
├─────────────────────────────────────────────┤
│ 4. 包管理器安装 │
│ • apt/yum/brew等 │
│ • 系统包管理 │
│ • 版本可能较旧 │
└─────────────────────────────────────────────┘
1.2 系统要求 #
text
系统要求:
┌─────────────────────────────────────────────┐
│ 硬件要求 │
├─────────────────────────────────────────────┤
│ CPU: 2核以上 │
│ 内存: 4GB以上(推荐8GB+) │
│ 磁盘: 根据数据保留时间决定 │
│ • 1个采集目标约1GB/天 │
│ • 推荐SSD存储 │
├─────────────────────────────────────────────┤
│ 软件要求 │
├─────────────────────────────────────────────┤
│ 操作系统: Linux/macOS/Windows │
│ 网络: 需要访问监控目标 │
│ 端口: 9090(默认) │
└─────────────────────────────────────────────┘
二、二进制安装 #
2.1 下载安装 #
bash
# 下载Prometheus
cd /tmp
# Linux amd64
wget https://github.com/prometheus/prometheus/releases/download/v2.48.0/prometheus-2.48.0.linux-amd64.tar.gz
# 解压
tar xvfz prometheus-2.48.0.linux-amd64.tar.gz
# 移动到安装目录
sudo mv prometheus-2.48.0.linux-amd64 /usr/local/prometheus
# 创建数据目录
sudo mkdir -p /var/lib/prometheus
# 创建用户
sudo useradd -rs /bin/false prometheus
sudo chown -R prometheus:prometheus /usr/local/prometheus
sudo chown -R prometheus:prometheus /var/lib/prometheus
2.2 创建配置文件 #
yaml
# /usr/local/prometheus/prometheus.yml
global:
scrape_interval: 15s
evaluation_interval: 15s
alerting:
alertmanagers:
- static_configs:
- targets: []
rule_files: []
scrape_configs:
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
2.3 创建系统服务 #
bash
# 创建systemd服务文件
sudo tee /etc/systemd/system/prometheus.service <<EOF
[Unit]
Description=Prometheus
Wants=network-online.target
After=network-online.target
[Service]
User=prometheus
Group=prometheus
Type=simple
ExecStart=/usr/local/prometheus/prometheus \
--config.file=/usr/local/prometheus/prometheus.yml \
--storage.tsdb.path=/var/lib/prometheus \
--web.console.templates=/usr/local/prometheus/consoles \
--web.console.libraries=/usr/local/prometheus/console_libraries \
--web.listen-address=0.0.0.0:9090
[Install]
WantedBy=multi-user.target
EOF
# 重载systemd
sudo systemctl daemon-reload
# 启动服务
sudo systemctl start prometheus
# 设置开机自启
sudo systemctl enable prometheus
# 查看状态
sudo systemctl status prometheus
2.4 验证安装 #
bash
# 检查服务状态
sudo systemctl status prometheus
# 查看日志
sudo journalctl -u prometheus -f
# 访问Web界面
# 浏览器打开 http://localhost:9090
# 检查API
curl http://localhost:9090/api/v1/status/config
三、Docker安装 #
3.1 基本Docker运行 #
bash
# 创建配置目录
mkdir -p ~/prometheus
cd ~/prometheus
# 创建配置文件
cat > prometheus.yml <<EOF
global:
scrape_interval: 15s
evaluation_interval: 15s
scrape_configs:
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
EOF
# 运行容器
docker run -d \
--name prometheus \
-p 9090:9090 \
-v ~/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml \
prom/prometheus
# 访问Web界面
# http://localhost:9090
3.2 Docker Compose部署 #
yaml
# docker-compose.yml
version: '3.8'
services:
prometheus:
image: prom/prometheus:v2.48.0
container_name: prometheus
restart: unless-stopped
ports:
- "9090:9090"
volumes:
- ./prometheus.yml:/etc/prometheus/prometheus.yml
- ./alerts:/etc/prometheus/alerts
- prometheus_data:/prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--storage.tsdb.retention.time=30d'
- '--web.console.libraries=/etc/prometheus/console_libraries'
- '--web.console.templates=/etc/prometheus/consoles'
- '--web.enable-lifecycle'
networks:
- monitoring
node-exporter:
image: prom/node-exporter:v1.7.0
container_name: node-exporter
restart: unless-stopped
ports:
- "9100:9100"
volumes:
- /proc:/host/proc:ro
- /sys:/host/sys:ro
- /:/rootfs:ro
command:
- '--path.procfs=/host/proc'
- '--path.sysfs=/host/sys'
- '--path.rootfs=/rootfs'
networks:
- monitoring
alertmanager:
image: prom/alertmanager:v0.26.0
container_name: alertmanager
restart: unless-stopped
ports:
- "9093:9093"
volumes:
- ./alertmanager.yml:/etc/alertmanager/alertmanager.yml
networks:
- monitoring
grafana:
image: grafana/grafana:10.2.0
container_name: grafana
restart: unless-stopped
ports:
- "3000:3000"
volumes:
- grafana_data:/var/lib/grafana
environment:
- GF_SECURITY_ADMIN_USER=admin
- GF_SECURITY_ADMIN_PASSWORD=admin
networks:
- monitoring
networks:
monitoring:
driver: bridge
volumes:
prometheus_data:
grafana_data:
3.3 启动Docker Compose #
bash
# 创建目录
mkdir -p ~/prometheus-stack
cd ~/prometheus-stack
# 创建配置文件
# 创建 prometheus.yml
# 创建 alertmanager.yml
# 启动服务
docker-compose up -d
# 查看状态
docker-compose ps
# 查看日志
docker-compose logs -f prometheus
# 停止服务
docker-compose down
四、Kubernetes安装 #
4.1 使用Helm安装 #
bash
# 添加Prometheus仓库
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
helm repo update
# 搜索chart
helm search repo prometheus
# 创建命名空间
kubectl create namespace monitoring
# 安装Prometheus
helm install prometheus prometheus-community/prometheus \
--namespace monitoring \
--set server.persistentVolume.size=50Gi \
--set server.retention=30d
# 查看状态
kubectl get pods -n monitoring
# 端口转发
kubectl port-forward -n monitoring svc/prometheus-server 9090:80
4.2 使用kube-prometheus-stack #
bash
# 安装kube-prometheus-stack(推荐)
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
helm repo update
# 创建values文件
cat > values.yaml <<EOF
prometheus:
prometheusSpec:
retention: 30d
storageSpec:
volumeClaimTemplate:
spec:
storageClassName: standard
resources:
requests:
storage: 50Gi
alertmanager:
alertmanagerSpec:
storage:
volumeClaimTemplate:
spec:
storageClassName: standard
resources:
requests:
storage: 10Gi
grafana:
adminPassword: admin
persistence:
enabled: true
size: 10Gi
EOF
# 安装
helm install prometheus prometheus-community/kube-prometheus-stack \
--namespace monitoring \
-f values.yaml
# 访问服务
kubectl port-forward -n monitoring svc/prometheus-grafana 3000:80
kubectl port-forward -n monitoring svc/prometheus-kube-prometheus-prometheus 9090:9090
4.3 手动部署YAML #
yaml
# prometheus-deployment.yaml
apiVersion: v1
kind: Namespace
metadata:
name: monitoring
---
apiVersion: v1
kind: ConfigMap
metadata:
name: prometheus-config
namespace: monitoring
data:
prometheus.yml: |
global:
scrape_interval: 15s
evaluation_interval: 15s
scrape_configs:
- job_name: 'prometheus'
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
action: keep
regex: monitoring;prometheus;web
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: prometheus
namespace: monitoring
spec:
replicas: 1
selector:
matchLabels:
app: prometheus
template:
metadata:
labels:
app: prometheus
spec:
containers:
- name: prometheus
image: prom/prometheus:v2.48.0
args:
- "--config.file=/etc/prometheus/prometheus.yml"
- "--storage.tsdb.path=/prometheus"
- "--storage.tsdb.retention.time=30d"
ports:
- containerPort: 9090
volumeMounts:
- name: config
mountPath: /etc/prometheus
- name: storage
mountPath: /prometheus
volumes:
- name: config
configMap:
name: prometheus-config
- name: storage
emptyDir: {}
---
apiVersion: v1
kind: Service
metadata:
name: prometheus
namespace: monitoring
spec:
ports:
- port: 9090
targetPort: 9090
selector:
app: prometheus
bash
# 应用配置
kubectl apply -f prometheus-deployment.yaml
# 查看状态
kubectl get pods -n monitoring
kubectl get svc -n monitoring
五、配置详解 #
5.1 主配置文件结构 #
yaml
# prometheus.yml 完整配置示例
global:
# 全局默认采集间隔
scrape_interval: 15s
# 全局默认规则评估间隔
evaluation_interval: 15s
# 采集超时时间
scrape_timeout: 10s
# 外部标签(用于联邦和远程存储)
external_labels:
cluster: 'production'
region: 'cn-east'
# 告警管理器配置
alerting:
alertmanagers:
- static_configs:
- targets:
- 'alertmanager:9093'
# 或使用服务发现
# kubernetes_sd_configs:
# - role: endpoints
# 规则文件
rule_files:
- '/etc/prometheus/alerts/*.yml'
- '/etc/prometheus/rules/*.yml'
# 远程写入配置
remote_write:
- url: 'http://remote-storage:9090/api/v1/write'
queue_config:
max_samples_per_send: 1000
max_shards: 200
# 远程读取配置
remote_read:
- url: 'http://remote-storage:9090/api/v1/read'
read_recent: true
# 采集配置
scrape_configs:
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
5.2 采集配置详解 #
yaml
# 采集配置示例
scrape_configs:
# 基本配置
- job_name: 'node-exporter'
# 采集间隔(覆盖全局)
scrape_interval: 10s
# 采集超时
scrape_timeout: 5s
# 指标路径
metrics_path: /metrics
# 协议方案
scheme: http
# 静态配置
static_configs:
- targets:
- 'node1:9100'
- 'node2:9100'
labels:
env: 'production'
# 文件服务发现
file_sd_configs:
- files:
- '/etc/prometheus/targets/*.json'
refresh_interval: 5m
# Consul服务发现
consul_sd_configs:
- server: 'consul:8500'
services: ['node-exporter']
# Kubernetes服务发现
kubernetes_sd_configs:
- role: pod
namespaces:
names:
- monitoring
# 重新标记
relabel_configs:
- source_labels: [__meta_kubernetes_pod_label_app]
target_label: app
- source_labels: [__address__]
target_label: instance
# 指标重新标记
metric_relabel_configs:
- source_labels: [__name__]
regex: 'go_.*'
action: drop
5.3 告警规则配置 #
yaml
# alerts/rules.yml
groups:
- name: node_alerts
interval: 30s
rules:
- alert: NodeDown
expr: up{job="node-exporter"} == 0
for: 1m
labels:
severity: critical
annotations:
summary: "Node {{ $labels.instance }} is down"
description: "Node {{ $labels.instance }} has been down for more than 1 minute."
- alert: HighMemoryUsage
expr: (1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100 > 85
for: 5m
labels:
severity: warning
annotations:
summary: "High memory usage on {{ $labels.instance }}"
description: "Memory usage is {{ $value }}%"
- name: application_alerts
rules:
- alert: HighErrorRate
expr: rate(http_requests_total{status=~"5.."}[5m]) / rate(http_requests_total[5m]) > 0.05
for: 5m
labels:
severity: critical
annotations:
summary: "High error rate detected"
description: "Error rate is {{ $value | humanizePercentage }}"
六、启动参数 #
6.1 常用启动参数 #
bash
prometheus \
# 配置文件
--config.file=/etc/prometheus/prometheus.yml
# 存储路径
--storage.tsdb.path=/var/lib/prometheus
# 数据保留时间
--storage.tsdb.retention.time=30d
# 最大数据块大小
--storage.tsdb.max-block-duration=3h
# 最小数据块大小
--storage.tsdb.min-block-duration=2h
# Web监听地址
--web.listen-address=0.0.0.0:9090
# Web最大连接数
--web.max-connections=512
# 启用生命周期API
--web.enable-lifecycle
# 启用管理API
--web.enable-admin-api
# 外部URL
--web.external-url=http://prometheus.example.com
# 并发查询数
--query.max-concurrency=20
# 查询超时
--query.timeout=2m
# 日志级别
--log.level=info
# 日志格式
--log.format=logfmt
6.2 运行时配置 #
bash
# 热加载配置(需要启用 --web.enable-lifecycle)
curl -X POST http://localhost:9090/-/reload
# 优雅关闭
curl -X PUT http://localhost:9090/-/quit
# 查看运行状态
curl http://localhost:9090/api/v1/status/runtimeinfo
七、验证安装 #
7.1 检查服务状态 #
bash
# 检查进程
ps aux | grep prometheus
# 检查端口
netstat -tlnp | grep 9090
# 检查API
curl http://localhost:9090/api/v1/status/config
# 检查目标状态
curl http://localhost:9090/api/v1/targets
# 检查健康状态
curl http://localhost:9090/-/healthy
7.2 Web界面验证 #
text
访问 http://localhost:9090
检查项目:
├── Status → Targets
│ └── 查看采集目标状态
├── Graph
│ └── 执行查询测试
├── Status → Configuration
│ └── 查看配置信息
└── Status → Runtime Information
└── 查看运行时信息
八、总结 #
安装方式对比:
| 方式 | 优点 | 缺点 | 适用场景 |
|---|---|---|---|
| 二进制 | 完全控制、无依赖 | 手动管理 | 生产服务器 |
| Docker | 快速部署、易管理 | 需要Docker | 开发测试 |
| Kubernetes | 自动化、云原生 | 复杂度高 | 云原生环境 |
配置文件要点:
| 配置项 | 说明 |
|---|---|
| global | 全局配置 |
| alerting | 告警管理器 |
| rule_files | 规则文件 |
| scrape_configs | 采集配置 |
下一步,让我们学习Prometheus的基础概念!
最后更新:2026-03-27