性能优化

Go本身很快，但要写出高性能代码，需要了解一些优化技巧。我刚开始写Go的时候，完全不关心性能问题，只要功能能实现就行。但在生产环境踩了几次坑之后，才明白性能优化的重要性。记住，过早优化是万恶之源，但也不能完全不考虑性能。

性能分析工具

pprof - CPU分析

import (
    "net/http"
    _ "net/http/pprof"
)

func main() {
    go func() {
        http.ListenAndServe(":6060", nil)
    }()
    
    // 你的应用代码
}

访问 http://localhost:6060/debug/pprof/ 查看分析数据。

# 生成CPU分析
go tool pprof http://localhost:6060/debug/pprof/profile?seconds=30

# 常用命令
(pprof) top10       # 最耗CPU的10个函数
(pprof) web         # 生成SVG图（需要graphviz）
(pprof) list main   # 查看main函数的每行开销

基准测试分析

# 运行基准测试并生成CPU分析
go test -bench=. -cpuprofile=cpu.prof

# 分析
go tool pprof cpu.prof

内存分析

# 内存分析
go tool pprof http://localhost:6060/debug/pprof/heap

# 查看内存分配
(pprof) top
(pprof) list MyFunction

内存优化

减少内存分配

// ❌ 频繁分配
func concat(strs []string) string {
    result := ""
    for _, s := range strs {
        result += s  // 每次都分配新字符串
    }
    return result
}

// ✅ 使用strings.Builder
func concat(strs []string) string {
    var builder strings.Builder
    for _, s := range strs {
        builder.WriteString(s)
    }
    return builder.String()
}

// ✅✅ 预分配容量
func concat(strs []string) string {
    total := 0
    for _, s := range strs {
        total += len(s)
    }
    
    var builder strings.Builder
    builder.Grow(total)  // 预分配
    for _, s := range strs {
        builder.WriteString(s)
    }
    return builder.String()
}

切片预分配

// ❌ 动态扩容
func process(items []int) []int {
    result := []int{}  // 初始容量0
    for _, item := range items {
        result = append(result, item*2)
    }
    return result
}

// ✅ 预分配容量
func process(items []int) []int {
    result := make([]int, 0, len(items))  // 预分配
    for _, item := range items {
        result = append(result, item*2)
    }
    return result
}

// ✅✅ 直接赋值
func process(items []int) []int {
    result := make([]int, len(items))
    for i, item := range items {
        result[i] = item * 2
    }
    return result
}

sync.Pool 复用对象

var bufferPool = sync.Pool{
    New: func() interface{} {
        return new(bytes.Buffer)
    },
}

func getBuffer() *bytes.Buffer {
    return bufferPool.Get().(*bytes.Buffer)
}

func putBuffer(buf *bytes.Buffer) {
    buf.Reset()
    bufferPool.Put(buf)
}

func process(data []byte) string {
    buf := getBuffer()
    defer putBuffer(buf)
    
    buf.Write(data)
    // 处理...
    return buf.String()
}

避免大对象在栈上

// 大对象会分配到堆上，但可以减少不必要的复制
type BigStruct struct {
    data [1024 * 1024]byte  // 1MB
}

// ❌ 值传递会复制
func process(b BigStruct) {}

// ✅ 指针传递
func process(b *BigStruct) {}

并发优化

减少锁竞争

// ❌ 全局锁
type Counter struct {
    mu    sync.Mutex
    count int
}

// ✅ 分片锁
type ShardedCounter struct {
    shards [256]struct {
        mu    sync.Mutex
        count int
    }
}

func (c *ShardedCounter) Inc(key string) {
    shard := fnv32(key) % 256
    c.shards[shard].mu.Lock()
    c.shards[shard].count++
    c.shards[shard].mu.Unlock()
}

使用原子操作

// ❌ 使用锁
type Counter struct {
    mu    sync.Mutex
    count int64
}

func (c *Counter) Inc() {
    c.mu.Lock()
    c.count++
    c.mu.Unlock()
}

// ✅ 使用原子操作
type Counter struct {
    count int64
}

func (c *Counter) Inc() {
    atomic.AddInt64(&c.count, 1)
}

func (c *Counter) Value() int64 {
    return atomic.LoadInt64(&c.count)
}

无锁数据结构

// 使用channel代替锁
type Counter struct {
    inc chan struct{}
    get chan int64
}

func NewCounter() *Counter {
    c := &Counter{
        inc: make(chan struct{}),
        get: make(chan int64),
    }
    go c.run()
    return c
}

func (c *Counter) run() {
    var count int64
    for {
        select {
        case <-c.inc:
            count++
        case c.get <- count:
        }
    }
}

I/O优化

使用缓冲I/O

// ❌ 无缓冲
file, _ := os.Open("data.txt")
reader := file

// ✅ 有缓冲
file, _ := os.Open("data.txt")
reader := bufio.NewReader(file)

// 写入同理
writer := bufio.NewWriter(file)
defer writer.Flush()

使用io.Copy

// ❌ 手动复制
buf := make([]byte, 1024)
for {
    n, err := src.Read(buf)
    if err != nil {
        break
    }
    dst.Write(buf[:n])
}

// ✅ 使用io.Copy（内部有优化）
io.Copy(dst, src)

编译优化

逃逸分析

// 查看逃逸分析
go build -gcflags="-m" main.go

// 减少逃逸
// ❌ 返回指针导致逃逸
func newInt() *int {
    x := 42
    return &x  // x逃逸到堆
}

// ✅ 传入指针
func setInt(x *int) {
    *x = 42
}

内联优化

// 小函数会被内联
//go:noinline  // 禁止内联（仅用于测试）
func add(a, b int) int {
    return a + b
}

// 查看内联决策
go build -gcflags="-m" main.go

常见优化技巧

1. 字符串和[]byte转换

// ❌ 标准转换会复制
s := string(bytes)
b := []byte(str)

// ✅ 零拷贝转换（不安全，慎用）
func StringToBytes(s string) []byte {
    return unsafe.Slice(unsafe.StringData(s), len(s))
}

func BytesToString(b []byte) string {
    return unsafe.String(unsafe.SliceData(b), len(b))
}

2. 避免不必要的类型转换

// ❌ 频繁转换
for _, v := range nums {
    sum += float64(v)  // 每次都转换
}

// ✅ 使用相同类型
var sum int
for _, v := range nums {
    sum += v
}
result := float64(sum) / float64(len(nums))

3. 使用值接收者避免解引用

// 小结构体用值接收者
type Point struct {
    X, Y int
}

func (p Point) Distance() float64 {
    return math.Sqrt(float64(p.X*p.X + p.Y*p.Y))
}

4. Map优化

// 预分配容量
m := make(map[string]int, 1000)

// 使用整数key比字符串快
// map[int]value 比 map[string]value 快

// 避免在热点路径查询两次
// ❌
if _, ok := m[key]; ok {
    v := m[key]
}

// ✅
if v, ok := m[key]; ok {
    // use v
}

基准测试示例

func BenchmarkConcat(b *testing.B) {
    strs := []string{"hello", " ", "world", "!"}
    
    b.Run("Plus", func(b *testing.B) {
        for i := 0; i < b.N; i++ {
            result := ""
            for _, s := range strs {
                result += s
            }
        }
    })
    
    b.Run("Builder", func(b *testing.B) {
        for i := 0; i < b.N; i++ {
            var builder strings.Builder
            for _, s := range strs {
                builder.WriteString(s)
            }
            _ = builder.String()
        }
    })
    
    b.Run("Join", func(b *testing.B) {
        for i := 0; i < b.N; i++ {
            _ = strings.Join(strs, "")
        }
    })
}

优化原则

1. 先测量，后优化

// 不要猜测，用数据说话
go test -bench=. -benchmem
go tool pprof cpu.prof

2. 优化热点

找到20%消耗80%资源的代码，优先优化。

3. 保持代码可读

// ❌ 过度优化，难以维护
func process(x int) int {
    return x<<1 + x  // 3 * x
}

// ✅ 清晰的代码
func process(x int) int {
    return 3 * x  // 编译器会优化
}

4. 避免过早优化

"过早优化是万恶之源" — Donald Knuth

先写正确的代码，再优化性能。

练习

使用pprof分析一个CPU密集型程序
优化一个有大量字符串拼接的函数
使用sync.Pool优化一个频繁分配对象的场景

参考答案

// 2. 字符串拼接优化
func BuildReport(items []Item) string {
    // 预估大小
    size := len(items) * 100  // 假设每项约100字节
    
    var sb strings.Builder
    sb.Grow(size)
    
    for _, item := range items {
        sb.WriteString(item.Name)
        sb.WriteByte(':')
        sb.WriteString(strconv.Itoa(item.Value))
        sb.WriteByte('\n')
    }
    
    return sb.String()
}

// 3. sync.Pool使用
type Parser struct {
    buffer []byte
}

var parserPool = sync.Pool{
    New: func() interface{} {
        return &Parser{
            buffer: make([]byte, 0, 1024),
        }
    },
}

func Parse(data []byte) Result {
    p := parserPool.Get().(*Parser)
    defer func() {
        p.buffer = p.buffer[:0]
        parserPool.Put(p)
    }()
    
    // 使用p.buffer处理数据
    return result
}

性能优化是个大话题，最后一节学习最佳实践！