前序
上文讲到rt0_go的runtime·schedinit(SB)
TEXT runtime·rt0_go<ABIInternal>(SB),NOSPLIT,$0
// 略, 查看 golang调度学习-调度流程 (一)
// create a new goroutine to start program
MOVQ $runtime·mainPC(SB), AX // entry, 就是 $runtime·main
PUSHQ AX // newproc 的第二个参数
PUSHQ $0 // arg size的第一个参数
CALL runtime·newproc(SB) // 调用 runtime·newproc($0, $runtime·mainPC(SB))
POPQ AX
POPQ AX
// start this M
CALL runtime·mstart(SB)
CALL runtime·abort(SB) // mstart should never return
RET
// Prevent dead-code elimination of debugCallV1, which is
// intended to be called by debuggers.
MOVQ $runtime·debugCallV1<ABIInternal>(SB), AX
RET
goroute
新建golang文件main.go
package main
func testGoFun() {
go testGoFun()
}
执行
go tool compile -N -l -S ./main.go > ./main.s
查看main.s文件
"".testGoFun STEXT size=71 args=0x0 locals=0x18 funcid=0x0
0x0000 00000 (./test_go.go:7) TEXT "".testGoFun(SB), ABIInternal, $24-0
0x0000 00000 (./test_go.go:7) MOVQ (TLS), CX
0x0009 00009 (./test_go.go:7) CMPQ SP, 16(CX)
0x000d 00013 (./test_go.go:7) PCDATA $0, $-2
0x000d 00013 (./test_go.go:7) JLS 64
0x000f 00015 (./test_go.go:7) PCDATA $0, $-1
0x000f 00015 (./test_go.go:7) SUBQ $24, SP
0x0013 00019 (./test_go.go:7) MOVQ BP, 16(SP)
0x0018 00024 (./test_go.go:7) LEAQ 16(SP), BP
0x001d 00029 (./test_go.go:7) FUNCDATA $0, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x001d 00029 (./test_go.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x001d 00029 (./test_go.go:8) MOVL $0, (SP)
0x0024 00036 (./test_go.go:8) LEAQ "".testGoFun·f(SB), AX
0x002b 00043 (./test_go.go:8) MOVQ AX, 8(SP)
0x0030 00048 (./test_go.go:8) PCDATA $1, $0
0x0030 00048 (./test_go.go:8) CALL runtime.newproc(SB)
0x0035 00053 (./test_go.go:9) MOVQ 16(SP), BP
0x003a 00058 (./test_go.go:9) ADDQ $24, SP
0x003e 00062 (./test_go.go:9) RET
0x003f 00063 (./test_go.go:9) NOP
0x003f 00063 (./test_go.go:7) PCDATA $1, $-1
0x003f 00063 (./test_go.go:7) PCDATA $0, $-2
0x003f 00063 (./test_go.go:7) NOP
0x0040 00064 (./test_go.go:7) CALL runtime.morestack_noctxt(SB)
0x0045 00069 (./test_go.go:7) PCDATA $0, $-1
0x0045 00069 (./test_go.go:7) JMP 0
0x0000 65 48 8b 0c 25 00 00 00 00 48 3b 61 10 76 31 48 eH..%....H;a.v1H
0x0010 83 ec 18 48 89 6c 24 10 48 8d 6c 24 10 c7 04 24 ...H.l$.H.l$...$
0x0020 00 00 00 00 48 8d 05 00 00 00 00 48 89 44 24 08 ....H......H.D$.
0x0030 e8 00 00 00 00 48 8b 6c 24 10 48 83 c4 18 c3 90 .....H.l$.H.....
0x0040 e8 00 00 00 00 eb b9 .......
rel 5+4 t=17 TLS+0
rel 39+4 t=16 "".testGoFun·f+0
rel 49+4 t=8 runtime.newproc+0
rel 65+4 t=8 runtime.morestack_noctxt+0
可以看到也是调用CALL runtime.newproc(SB)
来调用新协程
newproc
创建一个新的g运行带siz字节参数的fn, 并且把它放到g.m.p的待运行队列
在编写程序中,使用 go func() {}来创建一个goroutine(g),这条语句会被编译器翻译成函数 newproc()。
// Create a new g running fn with siz bytes of arguments.
// Put it on the queue of g's waiting to run.
// The compiler turns a go statement into a call to this.
//
// The stack layout of this call is unusual: it assumes that the
// arguments to pass to fn are on the stack sequentially immediately
// after &fn. Hence, they are logically part of newproc's argument
// frame, even though they don't appear in its signature (and can't
// because their types differ between call sites).
//
// This must be nosplit because this stack layout means there are
// untyped arguments in newproc's argument frame. Stack copies won't
// be able to adjust them and stack splits won't be able to copy them.
//
//go:nosplit
func newproc(siz int32, fn *funcval) {
argp := add(unsafe.Pointer(&fn), sys.PtrSize) // 下面例子a=1的位置
gp := getg()
pc := getcallerpc() // 下面例子funCaller的PC
systemstack(func() { // 在g0的堆栈上执行
newg := newproc1(fn, argp, siz, gp, pc) // 新建g,下面分析源码
_p_ := getg().m.p.ptr()
// 把newg放到_p_的runnext
// runqput第三个参数如果是True就把g放到runnext,runnext原有的放到runq。 否则g放到runq
// 如果runq满了就放到sched.runq(要加锁)
// 参考 https://blog.csdn.net/diaosssss/article/details/93066804
runqput(_p_, newg, true)
if mainStarted { // rt0_go调用的时候是False
wakep() // 详见 golang调度学习-调度流程 (三)
}
})
}
假设是调用
go funCaller() {
go funcA(a=1, b=2)
}
newproc的调用栈, caller SP表示funCaller的SP
位置 | 值 |
---|---|
(24)caller SP | b=2 |
(16)caller SP | a=1 |
(8)caller SP | *funcA |
(0)caller SP | siz=24, sizeof(1)+sizeof(2)+sizeof(*funcA) |
(-8)caller SP | caller PC |
newproc1
// Create a new g in state _Grunnable, starting at fn, with narg bytes
// of arguments starting at argp. callerpc is the address of the go
// statement that created this. The caller is responsible for adding
// the new g to the scheduler.
//
// This must run on the system stack because it's the continuation of
// newproc, which cannot split the stack.
//
//go:systemstack
func newproc1(fn *funcval, argp unsafe.Pointer, narg int32, callergp *g, callerpc uintptr) *g {
_g_ := getg()
if fn == nil {
_g_.m.throwing = -1 // do not dump full stacks
throw("go of nil func value")
}
acquirem() // disable preemption because it can be holding p in a local var, loc++
siz := narg
siz = (siz + 7) &^ 7
// We could allocate a larger initial stack if necessary.
// Not worth it: this is almost always an error.
// 4*sizeof(uintreg): extra space added below
// sizeof(uintreg): caller's LR (arm) or return address (x86, in gostartcall).
// 参数大小不能大约初始栈大小
if siz >= _StackMin-4*sys.RegSize-sys.RegSize {
throw("newproc: function arguments too large for new goroutine")
}
_p_ := _g_.m.p.ptr()
newg := gfget(_p_) // 从缓存中读取g, 详见下文
if newg == nil {
newg = malg(_StackMin) // 缓存中没有g, 新建g。分配栈为 2k 大小的G对象
casgstatus(newg, _Gidle, _Gdead) //将g的状态改为_Gdead
// 添加到allg数组,防止gc扫描清除掉
allgadd(newg) // publishes with a g->status of Gdead so GC scanner doesn't look at uninitialized stack.
}
if newg.stack.hi == 0 {
throw("newproc1: newg missing stack")
}
if readgstatus(newg) != _Gdead {
throw("newproc1: new g is not Gdead")
}
totalSize := 4*sys.RegSize + uintptr(siz) + sys.MinFrameSize // extra space in case of reads slightly beyond frame
totalSize += -totalSize & (sys.SpAlign - 1) // align to spAlign
// 新协程的栈顶计算,将栈顶减去参数占用的空间
sp := newg.stack.hi - totalSize
spArg := sp
if usesLR {
// caller's LR
*(*uintptr)(unsafe.Pointer(sp)) = 0
prepGoExitFrame(sp)
spArg += sys.MinFrameSize
}
if narg > 0 { // 如果有参数
// copy参数到栈上
memmove(unsafe.Pointer(spArg), argp, uintptr(narg))
// This is a stack-to-stack copy. If write barriers
// are enabled and the source stack is grey (the
// destination is always black), then perform a
// barrier copy. We do this *after* the memmove
// because the destination stack may have garbage on
// it.
if writeBarrier.needed && !_g_.m.curg.gcscandone {
f := findfunc(fn.fn)
stkmap := (*stackmap)(funcdata(f, _FUNCDATA_ArgsPointerMaps))
//一些gc相关的工作省略
if stkmap.nbit > 0 {
// We're in the prologue, so it's always stack map index 0.
bv := stackmapdata(stkmap, 0)
bulkBarrierBitmap(spArg, spArg, uintptr(bv.n)*sys.PtrSize, 0, bv.bytedata)
}
}
}
// 初始化G的gobuf,保存sp,pc,traceback信息任务函数等,
memclrNoHeapPointers(unsafe.Pointer(&newg.sched), unsafe.Sizeof(newg.sched))
newg.sched.sp = sp
newg.stktopsp = sp // 栈顶sp,计算traceback
// 保存goexit的地址到sched.pc,后面会调节 goexit 作为任务函数返回后执行的地址,所以goroutine结束后会调用goexit
newg.sched.pc = funcPC(goexit) + sys.PCQuantum // +PCQuantum so that previous instruction is in same function
// sched.g保存当前新的G
newg.sched.g = guintptr(unsafe.Pointer(newg))
// 将当前的pc压入栈,保存g的任务函数为pc
gostartcallfn(&newg.sched, fn)
newg.gopc = callerpc // 调用者pc, 计算traceback
newg.ancestors = saveAncestors(callergp) // 祖先g, 计算traceback
newg.startpc = fn.fn
if _g_.m.curg != nil {
newg.labels = _g_.m.curg.labels // profiler labels
}
// 堆栈转储和死锁检测器中是否必须省略g。
if isSystemGoroutine(newg, false) {
atomic.Xadd(&sched.ngsys, +1)
}
casgstatus(newg, _Gdead, _Grunnable) // 切换状态
// 从[_p_.goidcache,_p_.goidcacheend) 获取goid。 不够用就从sched.goidgen里面批量进货16个
if _p_.goidcache == _p_.goidcacheend {
// Sched.goidgen is the last allocated id,
// this batch must be [sched.goidgen+1, sched.goidgen+GoidCacheBatch].
// At startup sched.goidgen=0, so main goroutine receives goid=1.
_p_.goidcache = atomic.Xadd64(&sched.goidgen, _GoidCacheBatch)
_p_.goidcache -= _GoidCacheBatch - 1
_p_.goidcacheend = _p_.goidcache + _GoidCacheBatch
}
newg.goid = int64(_p_.goidcache)
_p_.goidcache++
if raceenabled {
newg.racectx = racegostart(callerpc)
}
if trace.enabled {
traceGoCreate(newg, newg.startpc)
}
releasem(_g_.m)
return newg
}
newproc1的主要工作:
- acquirem (m.lock++)
- 从缓存获取newg,缓存没有就新建一个
- 如果newg没有栈,就新建一个2k的栈
- 构建newg的栈,先把调用参数拷贝到栈上,接着push goexit到栈上(假装是goexit调用了fn)
- 初始化newg的sched,traceback, goid等信息
- newg状态从_Gdead转变为_Grunnable
- releasem (m.lock--)
gfget
从缓存中获取g
// Get from gfree list.
// If local list is empty, grab a batch from global list.
func gfget(_p_ *p) *g {
retry:
// 如果_p_.gFree为空,sched.gFree.stack或者sched.gFree.noStack不为空,偷最多32个过来
if _p_.gFree.empty() && (!sched.gFree.stack.empty() || !sched.gFree.noStack.empty()) {
lock(&sched.gFree.lock)
// Move a batch of free Gs to the P.
for _p_.gFree.n < 32 {
// Prefer Gs with stacks.
gp := sched.gFree.stack.pop()
if gp == nil {
gp = sched.gFree.noStack.pop()
if gp == nil {
break
}
}
sched.gFree.n--
_p_.gFree.push(gp)
_p_.gFree.n++
}
unlock(&sched.gFree.lock)
goto retry
}
gp := _p_.gFree.pop()
if gp == nil {
return nil
}
_p_.gFree.n--
if gp.stack.lo == 0 {
// Stack was deallocated in gfput. Allocate a new one.
systemstack(func() {
gp.stack = stackalloc(_FixedStack)
})
gp.stackguard0 = gp.stack.lo + _StackGuard
} else {
if raceenabled {
racemalloc(unsafe.Pointer(gp.stack.lo), gp.stack.hi-gp.stack.lo)
}
if msanenabled {
msanmalloc(unsafe.Pointer(gp.stack.lo), gp.stack.hi-gp.stack.lo)
}
}
return gp
}
malg()
malg()函数创建一个新的g,包括为该g申请栈空间(支持程序分配栈的系统)。系统中的每个g都是由该函数创建而来的
// Allocate a new g, with a stack big enough for stacksize bytes.
func malg(stacksize int32) *g {
newg := new(g)
if stacksize >= 0 {
stacksize = round2(_StackSystem + stacksize)
systemstack(func() {
newg.stack = stackalloc(uint32(stacksize))
})
newg.stackguard0 = newg.stack.lo + _StackGuard
newg.stackguard1 = ^uintptr(0)
// Clear the bottom word of the stack. We record g
// there on gsignal stack during VDSO on ARM and ARM64.
*(*uintptr)(unsafe.Pointer(newg.stack.lo)) = 0
}
return newg
}
gfput
// Put on gfree list.
// If local list is too long, transfer a batch to the global list.
func gfput(_p_ *p, gp *g) {
if readgstatus(gp) != _Gdead {
throw("gfput: bad status (not Gdead)")
}
stksize := gp.stack.hi - gp.stack.lo
if stksize != _FixedStack {
// non-standard stack size - free it.
stackfree(gp.stack)
gp.stack.lo = 0
gp.stack.hi = 0
gp.stackguard0 = 0
}
_p_.gFree.push(gp)
_p_.gFree.n++
if _p_.gFree.n >= 64 {
lock(&sched.gFree.lock)
for _p_.gFree.n >= 32 {
_p_.gFree.n--
gp = _p_.gFree.pop()
if gp.stack.lo == 0 {
sched.gFree.noStack.push(gp)
} else {
sched.gFree.stack.push(gp)
}
sched.gFree.n++
}
unlock(&sched.gFree.lock)
}
}
流程如下图:
引用文章
[1] Go语言内幕(6):启动和内存分配初始化
https://studygolang.com/artic...
**粗体** _斜体_ [链接](http://example.com) `代码` - 列表 > 引用
。你还可以使用@
来通知其他用户。