性能优化 #
性能优化是高级Go开发者的必备技能。本章介绍性能分析工具和常见优化技巧。
pprof性能分析 #
启用pprof #
go
package main
import (
"net/http"
_ "net/http/pprof"
)
func main() {
go func() {
http.ListenAndServe("localhost:6060", nil)
}()
select {}
}
访问pprof #
bash
go tool pprof http://localhost:6060/debug/pprof/profile
go tool pprof http://localhost:6060/debug/pprof/heap
go tool pprof http://localhost:6060/debug/pprof/goroutine
CPU分析 #
go
package main
import (
"os"
"runtime/pprof"
"time"
)
func cpuIntensive() {
for i := 0; i < 10000000; i++ {
_ = i * i
}
}
func main() {
f, _ := os.Create("cpu.prof")
pprof.StartCPUProfile(f)
defer pprof.StopCPUProfile()
cpuIntensive()
time.Sleep(time.Second)
}
分析:
bash
go tool pprof cpu.prof
(pprof) top10
(pprof) list cpuIntensive
内存分析 #
go
package main
import (
"fmt"
"runtime"
"runtime/pprof"
"time"
)
func allocateMemory() {
var slices [][]int
for i := 0; i < 100; i++ {
slice := make([]int, 10000)
slices = append(slices, slice)
}
runtime.GC()
}
func main() {
allocateMemory()
f, _ := os.Create("mem.prof")
pprof.WriteHeapProfile(f)
f.Close()
}
常见优化技巧 #
1. 切片预分配 #
go
package main
import "testing"
func BenchmarkSliceWithoutCap(b *testing.B) {
for i := 0; i < b.N; i++ {
var s []int
for j := 0; j < 1000; j++ {
s = append(s, j)
}
}
}
func BenchmarkSliceWithCap(b *testing.B) {
for i := 0; i < b.N; i++ {
s := make([]int, 0, 1000)
for j := 0; j < 1000; j++ {
s = append(s, j)
}
}
}
2. 映射预分配 #
go
package main
import "testing"
func BenchmarkMapWithoutCap(b *testing.B) {
for i := 0; i < b.N; i++ {
m := make(map[int]int)
for j := 0; j < 1000; j++ {
m[j] = j
}
}
}
func BenchmarkMapWithCap(b *testing.B) {
for i := 0; i < b.N; i++ {
m := make(map[int]int, 1000)
for j := 0; j < 1000; j++ {
m[j] = j
}
}
}
3. 字符串拼接优化 #
go
package main
import (
"strings"
"testing"
)
func BenchmarkConcatPlus(b *testing.B) {
for i := 0; i < b.N; i++ {
var s string
for j := 0; j < 100; j++ {
s += "x"
}
}
}
func BenchmarkConcatBuilder(b *testing.B) {
for i := 0; i < b.N; i++ {
var builder strings.Builder
for j := 0; j < 100; j++ {
builder.WriteString("x")
}
_ = builder.String()
}
}
4. 避免不必要的转换 #
go
package main
import (
"strconv"
"testing"
)
func BenchmarkItoa(b *testing.B) {
for i := 0; i < b.N; i++ {
_ = strconv.Itoa(i % 1000)
}
}
func BenchmarkSprintf(b *testing.B) {
for i := 0; i < b.N; i++ {
_ = strconv.Sprintf("%d", i%1000)
}
}
5. 使用sync.Pool #
go
package main
import (
"sync"
"testing"
)
var pool = sync.Pool{
New: func() interface{} {
return make([]byte, 1024)
},
}
func BenchmarkWithoutPool(b *testing.B) {
for i := 0; i < b.N; i++ {
buf := make([]byte, 1024)
_ = buf
}
}
func BenchmarkWithPool(b *testing.B) {
for i := 0; i < b.N; i++ {
buf := pool.Get().([]byte)
_ = buf
pool.Put(buf)
}
}
6. 减少逃逸分析 #
go
package main
type Data struct {
values [1000]int
}
func createOnHeap() *Data {
return &Data{}
}
func createOnStack() Data {
return Data{}
}
逃逸分析:
bash
go build -gcflags="-m" main.go
7. 使用内联 #
go
package main
func add(a, b int) int {
return a + b
}
func BenchmarkInline(b *testing.B) {
for i := 0; i < b.N; i++ {
_ = add(i, i+1)
}
}
查看内联决策:
bash
go build -gcflags="-m=2" main.go
内存优化 #
减少内存分配 #
go
package main
import (
"encoding/json"
"testing"
)
type User struct {
ID int `json:"id"`
Name string `json:"name"`
}
var userJSON = []byte(`{"id":1,"name":"test"}`)
func BenchmarkUnmarshalAlloc(b *testing.B) {
for i := 0; i < b.N; i++ {
var user User
json.Unmarshal(userJSON, &user)
}
}
func BenchmarkUnmarshalReuse(b *testing.B) {
var user User
for i := 0; i < b.N; i++ {
json.Unmarshal(userJSON, &user)
}
}
使用值类型 #
go
package main
import "testing"
type Point struct {
X, Y float64
}
func processValue(p Point) float64 {
return p.X + p.Y
}
func processPointer(p *Point) float64 {
return p.X + p.Y
}
func BenchmarkValue(b *testing.B) {
p := Point{1.0, 2.0}
for i := 0; i < b.N; i++ {
processValue(p)
}
}
func BenchmarkPointer(b *testing.B) {
p := &Point{1.0, 2.0}
for i := 0; i < b.N; i++ {
processPointer(p)
}
}
并发优化 #
合理设置GOMAXPROCS #
go
package main
import (
"runtime"
"sync"
)
func parallelWork(numGoroutines int) {
var wg sync.WaitGroup
wg.Add(numGoroutines)
for i := 0; i < numGoroutines; i++ {
go func() {
defer wg.Done()
work()
}()
}
wg.Wait()
}
func work() {
sum := 0
for i := 0; i < 1000000; i++ {
sum += i
}
}
func main() {
runtime.GOMAXPROCS(runtime.NumCPU())
parallelWork(runtime.NumCPU())
}
使用缓冲通道 #
go
package main
import "testing"
func BenchmarkUnbufferedChannel(b *testing.B) {
ch := make(chan int)
go func() {
for i := 0; i < b.N; i++ {
ch <- i
}
close(ch)
}()
for range ch {
}
}
func BenchmarkBufferedChannel(b *testing.B) {
ch := make(chan int, 100)
go func() {
for i := 0; i < b.N; i++ {
ch <- i
}
close(ch)
}()
for range ch {
}
}
性能分析工具 #
trace工具 #
go
package main
import (
"os"
"runtime/trace"
)
func main() {
f, _ := os.Create("trace.out")
trace.Start(f)
defer trace.Stop()
work()
}
分析:
bash
go tool trace trace.out
火焰图 #
bash
go tool pprof -http=:8080 cpu.prof
在浏览器中查看火焰图,定位热点函数。
优化检查清单 #
| 优化项 | 说明 |
|---|---|
| 切片预分配 | 避免频繁扩容 |
| 映射预分配 | 减少rehash |
| strings.Builder | 高效字符串拼接 |
| sync.Pool | 复用对象 |
| 值类型 | 减少堆分配 |
| 内联小函数 | 减少调用开销 |
| 缓冲通道 | 减少阻塞 |
| 避免全局变量 | 减少竞争 |
小结 #
性能优化原则:
- 先测量,后优化 - 使用pprof找到真正的瓶颈
- 避免过早优化 - 代码清晰比微优化更重要
- 关注热点 - 优化最耗时的部分
- 持续测量 - 优化后验证效果
记住:过早优化是万恶之源。在代码正确、可读的前提下,再考虑性能优化。