性能优化 #

性能优化是高级Go开发者的必备技能。本章介绍性能分析工具和常见优化技巧。

pprof性能分析 #

启用pprof #

package main

import (
    "net/http"
    _ "net/http/pprof"
)

func main() {
    go func() {
        http.ListenAndServe("localhost:6060", nil)
    }()
    
    select {}
}

访问pprof #

bash

go tool pprof http://localhost:6060/debug/pprof/profile
go tool pprof http://localhost:6060/debug/pprof/heap
go tool pprof http://localhost:6060/debug/pprof/goroutine

CPU分析 #

package main

import (
    "os"
    "runtime/pprof"
    "time"
)

func cpuIntensive() {
    for i := 0; i < 10000000; i++ {
        _ = i * i
    }
}

func main() {
    f, _ := os.Create("cpu.prof")
    pprof.StartCPUProfile(f)
    defer pprof.StopCPUProfile()
    
    cpuIntensive()
    time.Sleep(time.Second)
}

分析：

bash

go tool pprof cpu.prof
(pprof) top10
(pprof) list cpuIntensive

内存分析 #

package main

import (
    "fmt"
    "runtime"
    "runtime/pprof"
    "time"
)

func allocateMemory() {
    var slices [][]int
    for i := 0; i < 100; i++ {
        slice := make([]int, 10000)
        slices = append(slices, slice)
    }
    runtime.GC()
}

func main() {
    allocateMemory()
    
    f, _ := os.Create("mem.prof")
    pprof.WriteHeapProfile(f)
    f.Close()
}

常见优化技巧 #

1. 切片预分配 #

package main

import "testing"

func BenchmarkSliceWithoutCap(b *testing.B) {
    for i := 0; i < b.N; i++ {
        var s []int
        for j := 0; j < 1000; j++ {
            s = append(s, j)
        }
    }
}

func BenchmarkSliceWithCap(b *testing.B) {
    for i := 0; i < b.N; i++ {
        s := make([]int, 0, 1000)
        for j := 0; j < 1000; j++ {
            s = append(s, j)
        }
    }
}

2. 映射预分配 #

package main

import "testing"

func BenchmarkMapWithoutCap(b *testing.B) {
    for i := 0; i < b.N; i++ {
        m := make(map[int]int)
        for j := 0; j < 1000; j++ {
            m[j] = j
        }
    }
}

func BenchmarkMapWithCap(b *testing.B) {
    for i := 0; i < b.N; i++ {
        m := make(map[int]int, 1000)
        for j := 0; j < 1000; j++ {
            m[j] = j
        }
    }
}

3. 字符串拼接优化 #

package main

import (
    "strings"
    "testing"
)

func BenchmarkConcatPlus(b *testing.B) {
    for i := 0; i < b.N; i++ {
        var s string
        for j := 0; j < 100; j++ {
            s += "x"
        }
    }
}

func BenchmarkConcatBuilder(b *testing.B) {
    for i := 0; i < b.N; i++ {
        var builder strings.Builder
        for j := 0; j < 100; j++ {
            builder.WriteString("x")
        }
        _ = builder.String()
    }
}

4. 避免不必要的转换 #

package main

import (
    "strconv"
    "testing"
)

func BenchmarkItoa(b *testing.B) {
    for i := 0; i < b.N; i++ {
        _ = strconv.Itoa(i % 1000)
    }
}

func BenchmarkSprintf(b *testing.B) {
    for i := 0; i < b.N; i++ {
        _ = strconv.Sprintf("%d", i%1000)
    }
}

5. 使用sync.Pool #

package main

import (
    "sync"
    "testing"
)

var pool = sync.Pool{
    New: func() interface{} {
        return make([]byte, 1024)
    },
}

func BenchmarkWithoutPool(b *testing.B) {
    for i := 0; i < b.N; i++ {
        buf := make([]byte, 1024)
        _ = buf
    }
}

func BenchmarkWithPool(b *testing.B) {
    for i := 0; i < b.N; i++ {
        buf := pool.Get().([]byte)
        _ = buf
        pool.Put(buf)
    }
}

6. 减少逃逸分析 #

package main

type Data struct {
    values [1000]int
}

func createOnHeap() *Data {
    return &Data{}
}

func createOnStack() Data {
    return Data{}
}

逃逸分析：

bash

go build -gcflags="-m" main.go

7. 使用内联 #

package main

func add(a, b int) int {
    return a + b
}

func BenchmarkInline(b *testing.B) {
    for i := 0; i < b.N; i++ {
        _ = add(i, i+1)
    }
}

查看内联决策：

bash

go build -gcflags="-m=2" main.go

内存优化 #

减少内存分配 #

package main

import (
    "encoding/json"
    "testing"
)

type User struct {
    ID   int    `json:"id"`
    Name string `json:"name"`
}

var userJSON = []byte(`{"id":1,"name":"test"}`)

func BenchmarkUnmarshalAlloc(b *testing.B) {
    for i := 0; i < b.N; i++ {
        var user User
        json.Unmarshal(userJSON, &user)
    }
}

func BenchmarkUnmarshalReuse(b *testing.B) {
    var user User
    for i := 0; i < b.N; i++ {
        json.Unmarshal(userJSON, &user)
    }
}

使用值类型 #

package main

import "testing"

type Point struct {
    X, Y float64
}

func processValue(p Point) float64 {
    return p.X + p.Y
}

func processPointer(p *Point) float64 {
    return p.X + p.Y
}

func BenchmarkValue(b *testing.B) {
    p := Point{1.0, 2.0}
    for i := 0; i < b.N; i++ {
        processValue(p)
    }
}

func BenchmarkPointer(b *testing.B) {
    p := &Point{1.0, 2.0}
    for i := 0; i < b.N; i++ {
        processPointer(p)
    }
}

并发优化 #

合理设置GOMAXPROCS #

package main

import (
    "runtime"
    "sync"
)

func parallelWork(numGoroutines int) {
    var wg sync.WaitGroup
    wg.Add(numGoroutines)
    
    for i := 0; i < numGoroutines; i++ {
        go func() {
            defer wg.Done()
            work()
        }()
    }
    
    wg.Wait()
}

func work() {
    sum := 0
    for i := 0; i < 1000000; i++ {
        sum += i
    }
}

func main() {
    runtime.GOMAXPROCS(runtime.NumCPU())
    parallelWork(runtime.NumCPU())
}

使用缓冲通道 #

package main

import "testing"

func BenchmarkUnbufferedChannel(b *testing.B) {
    ch := make(chan int)
    go func() {
        for i := 0; i < b.N; i++ {
            ch <- i
        }
        close(ch)
    }()
    
    for range ch {
    }
}

func BenchmarkBufferedChannel(b *testing.B) {
    ch := make(chan int, 100)
    go func() {
        for i := 0; i < b.N; i++ {
            ch <- i
        }
        close(ch)
    }()
    
    for range ch {
    }
}

性能分析工具 #

trace工具 #

package main

import (
    "os"
    "runtime/trace"
)

func main() {
    f, _ := os.Create("trace.out")
    trace.Start(f)
    defer trace.Stop()
    
    work()
}

分析：

bash

go tool trace trace.out

火焰图 #

bash

go tool pprof -http=:8080 cpu.prof

在浏览器中查看火焰图，定位热点函数。

优化检查清单 #

优化项	说明
切片预分配	避免频繁扩容
映射预分配	减少rehash
strings.Builder	高效字符串拼接
sync.Pool	复用对象
值类型	减少堆分配
内联小函数	减少调用开销
缓冲通道	减少阻塞
避免全局变量	减少竞争

小结 #

性能优化原则：

先测量，后优化 - 使用pprof找到真正的瓶颈
避免过早优化 - 代码清晰比微优化更重要
关注热点 - 优化最耗时的部分
持续测量 - 优化后验证效果

记住：过早优化是万恶之源。在代码正确、可读的前提下，再考虑性能优化。