Kirill Cherniavskiy for GopherCon Singapore 2023
A time delay between the cause and the effect.
(c) Wikipedia
Causes GC to eat 30% of CPU time
import "math/big"
type State struct {
// skipping fields
index *big.Int
}
func (s *state) UpdateIndex(val *big.Int) {
s.index = new(big.Int).Set(val)
}
import "math/big"
type State struct {
// skipping fields
index big.Int
}
func (s *state) UpdateIndex(val *big.Int) {
s.index.Set(val)
}
Which tools I did use for this examples?
$ go build -gcflags '-m' # simple escape analysis
$ go build -gcflags '-m=2' # more verbose analysis
$ go build -gcflags '-l' # disable inlining
$ go build -gcflags '-S' # print assembly listing
$ go tool objdump -s main.main -S example.com > main.go.s
Be careful with interface function parameters — arguments for these parameters are often moved to heap before passing to the callee function.
Minimal reproducible heap escape.
package main
import "fmt"
func main() {
var x int = 256
fmt.Println(x)
}
package main
import "fmt"
func main() {
var x1, x2 int = 1, 256 // 0x01, 0xFF+1
fmt.Println(x1)
fmt.Println(x2)
}
Escape analysis output.
./main.go:7:13: ... argument does not escape
./main.go:7:14: x1 escapes to heap
./main.go:8:13: ... argument does not escape
./main.go:8:14: x2 escapes to heap
Output of objdump
.
MOVL $0x1, AX
CALL runtime.convT64(SB)
LEAQ 0x6e9b(IP), CX
MOVQ CX, 0x18(SP)
MOVQ AX, 0x20(SP)
LEAQ 0x18(SP), AX
MOVL $0x1, BX
MOVQ BX, CX
NOPL 0(AX)
CALL fmt.Println(SB)
func convT64(val uint64) (x unsafe.Pointer) {
if val < uint64(len(staticuint64s)) {
x = unsafe.Pointer(&staticuint64s[val])
} else {
x = mallocgc(8, uint64Type, false)
*(*uint64)(x) = val
}
return
}
Only x2
is allocated on heap.
package main
import "fmt"
func main() {
var x1, x2 int = 1, 256 // 0x01, 0xFF+1
fmt.Println(x1)
fmt.Println(x2)
}
package main
import "fmt"
func main() {
fmt.Println(1)
var one int = 1
fmt.Println(one)
}
MOVUPS X15, 0x18(SP)
LEAQ 0x6ea5(IP), DX
MOVQ DX, 0x18(SP)
LEAQ 0x36f71(IP), SI
MOVQ SI, 0x20(SP)
LEAQ 0x18(SP), AX
MOVL $0x1, BX
MOVQ BX, CX
CALL fmt.Println(SB)
MOVUPS X15, 0x18(SP)
MOVL $0x1, AX
CALL runtime.convT64(SB)
LEAQ 0x6e6b(IP), DX
MOVQ DX, 0x18(SP)
MOVQ AX, 0x20(SP)
LEAQ 0x18(SP), AX
MOVL $0x1, BX
MOVQ BX, CX
CALL fmt.Println(SB)
(if we have it and if we need it)
type Inter interface { // like fmt.Stringer
Int64() int64
}
type inter64 int64 // implementation
func (i inter64) Int64() int64 {
return int64(i)
}
//go:noinline
func toInt(i Inter) int64 {
return i.Int64()
}
type Inter interface {
Int64() int64
}
type inter64 int64
func (i inter64) Int64() int64 {
return int64(i)
}
//go:noinline
func toInt64(i inter64) int64 {
return i.Int64()
}
func main() {
i64_1 := inter64(1) // MOVL $0x1, AX
_ = toInt64(i64_1) // CALL main.toInt64(SB)
}
type Inter interface {
Int64() int64
}
type inter64 int64
func (i inter64) Int64() int64 {
return int64(i)
}
//go:noinline
func toIntGeneric[T Inter](i T) int64 {
return i.Int64()
}
LEAQ main..dict.toIntGeneric[main.inter64](SB), AX
MOVL $0x1, BX
CALL main.toIntGeneric[go.shape.int64](SB)
The GC shape of a type means how that type appears to the allocator / garbage collector. It is determined by its size, its required alignment, and which parts of the type contain a pointer.
When function call with interface parameter is inlined, it bypasses virtual table lookup. Compiler may not move to heap its arguments.
go:noinline
;go:uintptrescapes
;type Calc interface {
Add(int) int
}
type calcInt int
func (c *calcInt) Add(n int) (sum int) {
sum = int(*c) + int(n)
*c = calcInt(sum)
return
}
Let’s see the difference between interface parameter vs generic parameter.
func main() {
var c1 calcInt
_ = sum(&c1, 1, 2, 3)
}
func sum(calc Calc, vals ...int) (sum int) {
for _, val := range vals {
sum = calc.Add(val)
}
return
}
; var c1 calcInt
MOVQ $0x0, 0x18(SP)
; _ = sum(&c1, 1, 2, 3)
MOVUPS X15, 0x70(SP)
MOVUPS X15, 0x78(SP)
MOVQ $0x1, 0x70(SP)
MOVQ $0x2, 0x78(SP)
MOVQ $0x3, 0x80(SP)
; ... range loop jumps
; sum = calc.Add(val)
LEAQ 0x18(SP), AX
CALL main.(*calcInt).Add(SB)
; ... the rest
func main() {
var c2 calcInt
_ = sumGeneric(&c2, 1, 2, 3)
}
func sumGeneric[T Calc](calc T, vals ...int) (sum int) {
for _, val := range vals {
sum = calc.Add(val)
}
return
}
; var c1g calcInt
LEAQ 0x4b86(IP), AX
CALL runtime.newobject(SB)
MOVQ AX, 0x90(SP)
; _ = sumGeneric(&c1g, 1, 2, 3)
; ... range loop jumps
; sum = calc.Add(val)
LEAQ main..dict.sumGeneric[*main.calcInt](SB), DX
LEAQ 0xfffffef6(IP), SI
CALL SI
Type | Allocs | Dyn. dispatch | Inline |
---|---|---|---|
Exact | No | No | Yes |
Interface | Inline | Inline | Yes |
Generics | Shapes | Yes | No |
func
with actual types
type foo struct {
f *int
}
func (b *foo) setDefault() {
var one int = 1 // moved to heap: one
b.f = &one
}
func (b *foo) setF(f *int) {
b.f = f
}
Both values are moved to heap.
func main() {
var b1 foo
b1.setDefault()
var b2 foo
var f int = 2 // moved to heap: f
b2.setF(&f)
}
func main() {
var target foo
target.f = new(int) // call before critical path
target.setVal(2) // call on performance critical path
}
type foo struct {
f *int
}
func (b *foo) setVal(v int) {
*b.f = v
}
If you prefer pointers parameters.
type foo struct {
f *int
}
func (b *foo) setValPtr(v *int) {
*b.f = *v
}
type foo struct {
x int
}
func newFoo(x int) *foo {
return &foo{x} // escapes to heap
}
type foo struct {
x int
}
func (f *foo) set(x int) *foo {
f.x = x
return f
}
func main() {
f := new(foo).set(42) // no allocation
}
Types in math/big
are a good example of a design that avoid redundant allocations.
No allocations:
import "math/big"
func main() {
one := new(big.Int).SetInt64(1)
two := new(big.Int).SetInt64(2)
three := new(big.Int).SetInt64(3)
var sum big.Int
sum.Add(&sum, one).Add(&sum, two).Add(&sum, three)
println(sum.String())
}
type SmallInt [1]int32
func (i *SmallInt) Set(x int32) *SmallInt {
i[0] = x
return i
}
func (i *SmallInt) Add(x, y *SmallInt) *SmallInt {
i[0] = x[0] + y[0]
return i
}
type Child int
type Parent struct {
C *Child
}
func (p *Parent) SetChild(c *Child) {
p.C = c
}
; c := Child(1)
LEAQ 0x4ef9(IP), AX
CALL runtime.newobject(SB)
MOVQ $0x1, 0(AX)
; p.SetChild(&c)
MOVQ AX, BX
LEAQ 0x20(SP), AX
NOPL 0(AX)(AX*1)
CALL main.(*Parent).SetChild(SB)
func (p *Parent) SetChildUnsafe(c *Child) {
p.C = (*Child)(noescape(unsafe.Pointer(c)))
}
//go:nosplit
//go:nocheckptr
func noescape(p unsafe.Pointer) unsafe.Pointer {
x := uintptr(p)
return unsafe.Pointer(x ^ 0)
}
; c := Child(2)
MOVQ $0x2, 0x10(SP)
; p.SetChildUnsafe(&c)
LEAQ 0x18(SP), AX
LEAQ 0x10(SP), BX
CALL main.(*Parent).SetChildUnsafe(SB)
It could be dangerous — use only if the child object is not accessible outside of the parent’s stack frame.
func dangerousOperation(p *Parent) {
defer p.SetChild(nil)
var c Child
p.SetChildUnsafe(&c)
// work with parent and child
}