自定义导出器 #

一、开发概述 #

1.1 开发方式 #

text

自定义导出器开发方式：

┌─────────────────────────────────────────────┐
│ 1. 使用客户端库                             │
├─────────────────────────────────────────────┤
│ • 应用内集成                                │
│ • 直接暴露指标                              │
│ • 推荐：长期运行的服务                      │
└─────────────────────────────────────────────┘

┌─────────────────────────────────────────────┐
│ 2. 独立导出器                               │
├─────────────────────────────────────────────┤
│ • 独立程序                                  │
│ • 连接外部系统获取指标                      │
│ • 推荐：第三方系统监控                      │
└─────────────────────────────────────────────┘

┌─────────────────────────────────────────────┐
│ 3. Pushgateway                              │
├─────────────────────────────────────────────┤
│ • 推送指标到Pushgateway                     │
│ • Prometheus从Pushgateway拉取               │
│ • 推荐：短期任务、批处理任务                │
└─────────────────────────────────────────────┘

1.2 客户端库 #

text

官方客户端库：

┌─────────────────────────────────────────────┐
│ 语言      │ 仓库地址                        │
├───────────┼──────────────────────────────────┤
│ Go        │ github.com/prometheus/client_golang │
│ Java      │ github.com/prometheus/client_java │
│ Python    │ github.com/prometheus/client_python │
│ Ruby      │ github.com/prometheus/client_ruby │
│ Rust      │ github.com/prometheus/client_rust │
└─────────────────────────────────────────────┘

社区客户端库：
• .NET, Node.js, PHP, C++, 等等

二、Python客户端 #

2.1 安装 #

bash

pip install prometheus-client

2.2 基本示例 #

python

from prometheus_client import Counter, Gauge, Histogram, start_server
import random
import time

# 创建Counter
REQUEST_COUNT = Counter(
    'http_requests_total',
    'Total HTTP requests',
    ['method', 'endpoint', 'status']
)

# 创建Gauge
QUEUE_LENGTH = Gauge(
    'queue_length',
    'Current queue length',
    ['queue_name']
)

# 创建Histogram
REQUEST_LATENCY = Histogram(
    'http_request_duration_seconds',
    'HTTP request latency',
    ['method', 'endpoint'],
    buckets=[0.1, 0.5, 1.0, 2.0, 5.0, 10.0]
)

def simulate_requests():
    while True:
        # 模拟请求
        method = random.choice(['GET', 'POST', 'PUT', 'DELETE'])
        endpoint = random.choice(['/api/users', '/api/orders', '/api/products'])
        status = random.choice(['200', '201', '400', '404', '500'])
        
        # 增加Counter
        REQUEST_COUNT.labels(method=method, endpoint=endpoint, status=status).inc()
        
        # 设置Gauge
        QUEUE_LENGTH.labels(queue_name='email').set(random.randint(0, 100))
        
        # 记录Histogram
        latency = random.uniform(0.01, 2.0)
        REQUEST_LATENCY.labels(method=method, endpoint=endpoint).observe(latency)
        
        time.sleep(1)

if __name__ == '__main__':
    # 启动HTTP服务器
    start_server(8000)
    print("Exporter started on port 8000")
    
    # 模拟请求
    simulate_requests()

2.3 Flask集成 #

python

from flask import Flask, Response
from prometheus_client import Counter, Histogram, generate_latest, CONTENT_TYPE_LATEST
import time

app = Flask(__name__)

# 创建指标
REQUEST_COUNT = Counter(
    'http_requests_total',
    'Total HTTP requests',
    ['method', 'endpoint', 'status']
)

REQUEST_LATENCY = Histogram(
    'http_request_duration_seconds',
    'HTTP request latency',
    ['method', 'endpoint']
)

@app.route('/metrics')
def metrics():
    return Response(generate_latest(), mimetype=CONTENT_TYPE_LATEST)

@app.route('/api/users')
def users():
    start_time = time.time()
    
    # 业务逻辑
    time.sleep(0.1)
    
    # 记录指标
    REQUEST_COUNT.labels(method='GET', endpoint='/api/users', status='200').inc()
    REQUEST_LATENCY.labels(method='GET', endpoint='/api/users').observe(time.time() - start_time)
    
    return {'users': []}

if __name__ == '__main__':
    app.run(port=5000)

三、Go客户端 #

3.1 安装 #

bash

go get github.com/prometheus/client_golang/prometheus
go get github.com/prometheus/client_golang/prometheus/promhttp

3.2 基本示例 #

package main

import (
    "math/rand"
    "net/http"
    "time"

    "github.com/prometheus/client_golang/prometheus"
    "github.com/prometheus/client_golang/prometheus/promhttp"
)

var (
    requestCount = prometheus.NewCounterVec(
        prometheus.CounterOpts{
            Name: "http_requests_total",
            Help: "Total HTTP requests",
        },
        []string{"method", "endpoint", "status"},
    )

    queueLength = prometheus.NewGaugeVec(
        prometheus.GaugeOpts{
            Name: "queue_length",
            Help: "Current queue length",
        },
        []string{"queue_name"},
    )

    requestLatency = prometheus.NewHistogramVec(
        prometheus.HistogramOpts{
            Name:    "http_request_duration_seconds",
            Help:    "HTTP request latency",
            Buckets: []float64{0.1, 0.5, 1.0, 2.0, 5.0, 10.0},
        },
        []string{"method", "endpoint"},
    )
)

func init() {
    prometheus.MustRegister(requestCount)
    prometheus.MustRegister(queueLength)
    prometheus.MustRegister(requestLatency)
}

func simulateMetrics() {
    for {
        methods := []string{"GET", "POST", "PUT", "DELETE"}
        endpoints := []string{"/api/users", "/api/orders", "/api/products"}
        statuses := []string{"200", "201", "400", "404", "500"}

        method := methods[rand.Intn(len(methods))]
        endpoint := endpoints[rand.Intn(len(endpoints))]
        status := statuses[rand.Intn(len(statuses))]

        requestCount.WithLabelValues(method, endpoint, status).Inc()
        queueLength.WithLabelValues("email").Set(float64(rand.Intn(100)))
        requestLatency.WithLabelValues(method, endpoint).Observe(rand.Float64() * 2)

        time.Sleep(time.Second)
    }
}

func main() {
    go simulateMetrics()

    http.Handle("/metrics", promhttp.Handler())
    http.ListenAndServe(":8000", nil)
}

3.3 HTTP中间件 #

package main

import (
    "net/http"
    "strconv"
    "time"

    "github.com/prometheus/client_golang/prometheus"
    "github.com/prometheus/client_golang/prometheus/promhttp"
)

var (
    httpRequestsTotal = prometheus.NewCounterVec(
        prometheus.CounterOpts{
            Name: "http_requests_total",
            Help: "Total HTTP requests",
        },
        []string{"method", "path", "status"},
    )

    httpRequestDuration = prometheus.NewHistogramVec(
        prometheus.HistogramOpts{
            Name:    "http_request_duration_seconds",
            Help:    "HTTP request duration",
            Buckets: []float64{0.1, 0.5, 1.0, 2.0, 5.0},
        },
        []string{"method", "path"},
    )
)

func init() {
    prometheus.MustRegister(httpRequestsTotal)
    prometheus.MustRegister(httpRequestDuration)
}

func prometheusMiddleware(next http.Handler) http.Handler {
    return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
        start := time.Now()

        next.ServeHTTP(w, r)

        duration := time.Since(start).Seconds()
        status := strconv.Itoa(http.StatusOK)

        httpRequestsTotal.WithLabelValues(r.Method, r.URL.Path, status).Inc()
        httpRequestDuration.WithLabelValues(r.Method, r.URL.Path).Observe(duration)
    })
}

func main() {
    mux := http.NewServeMux()
    mux.HandleFunc("/api/users", func(w http.ResponseWriter, r *http.Request) {
        w.Write([]byte(`{"users": []}`))
    })
    mux.Handle("/metrics", promhttp.Handler())

    http.ListenAndServe(":5000", prometheusMiddleware(mux))
}

四、Java客户端 #

4.1 Maven依赖 #

xml

<dependency>
    <groupId>io.prometheus</groupId>
    <artifactId>simpleclient</artifactId>
    <version>0.16.0</version>
</dependency>
<dependency>
    <groupId>io.prometheus</groupId>
    <artifactId>simpleclient_httpserver</artifactId>
    <version>0.16.0</version>
</dependency>

4.2 基本示例 #

java

import io.prometheus.client.Counter;
import io.prometheus.client.Gauge;
import io.prometheus.client.Histogram;
import io.prometheus.client.exporter.HTTPServer;

public class ExporterExample {
    static final Counter requestCount = Counter.build()
            .name("http_requests_total")
            .help("Total HTTP requests")
            .labelNames("method", "endpoint", "status")
            .register();

    static final Gauge queueLength = Gauge.build()
            .name("queue_length")
            .help("Current queue length")
            .labelNames("queue_name")
            .register();

    static final Histogram requestLatency = Histogram.build()
            .name("http_request_duration_seconds")
            .help("HTTP request latency")
            .labelNames("method", "endpoint")
            .register();

    public static void main(String[] args) throws Exception {
        HTTPServer server = new HTTPServer(8000);

        while (true) {
            String method = "GET";
            String endpoint = "/api/users";
            String status = "200";

            requestCount.labels(method, endpoint, status).inc();
            queueLength.labels("email").set(Math.random() * 100);
            requestLatency.labels(method, endpoint).observe(Math.random() * 2);

            Thread.sleep(1000);
        }
    }
}

五、最佳实践 #

5.1 命名规范 #

text

指标命名规范：

┌─────────────────────────────────────────────┐
│ 1. 使用snake_case                           │
├─────────────────────────────────────────────┤
│ 好的：http_requests_total                   │
│ 不好的：httpRequestsTotal                   │
├─────────────────────────────────────────────┤
│ 2. 包含单位                                 │
├─────────────────────────────────────────────┤
│ 好的：http_request_duration_seconds         │
│ 不好的：http_request_duration               │
├─────────────────────────────────────────────┤
│ 3. Counter使用_total后缀                    │
├─────────────────────────────────────────────┤
│ 好的：http_requests_total                   │
│ 不好的：http_requests_count                 │
└─────────────────────────────────────────────┘

5.2 标签设计 #

text

标签设计原则：

┌─────────────────────────────────────────────┐
│ 1. 避免高基数                               │
├─────────────────────────────────────────────┤
│ 不要使用：user_id, request_id               │
│ 使用：method, status, endpoint              │
├─────────────────────────────────────────────┤
│ 2. 标签值有限                               │
├─────────────────────────────────────────────┤
│ 好的：status="200", method="GET"            │
│ 不好的：timestamp="1700000000"              │
├─────────────────────────────────────────────┤
│ 3. 保持一致性                               │
├─────────────────────────────────────────────┤
│ 所有服务使用相同的标签名                    │
└─────────────────────────────────────────────┘

六、总结 #

客户端库选择：

语言	客户端库
Go	client_golang
Python	client_python
Java	client_java

开发要点：

要点	说明
命名规范	snake_case，包含单位
标签设计	避免高基数
指标类型	选择合适的类型

下一步，让我们学习告警管理！