golang调度学习-调度流程 (四) mstart

不管是rt0_go还是go语法(如果在有idle的情况下)最终都会调用mstart

mstart

mstart是由g0进程执行，执行有两个起点：（存疑）

从m的启动函数（创建m的时候绑定的）mstart()开始，触发m的调度
调度过程中调用stopm()睡眠后，通过 notewakeup(&mp.park)恢复m的执行，并从stopm()的位置开始执行，重新调度。

// mstart is the entry-point for new Ms.
//
// This must not split the stack because we may not even have stack
// bounds set up yet.
//
// May run during STW (because it doesn't have a P yet), so write
// barriers are not allowed.
//
//go:nosplit
//go:nowritebarrierrec
func mstart() {
    _g_ := getg()

    osStack := _g_.stack.lo == 0
    if osStack {        // 处理系统栈m0的g0
        // Initialize stack bounds from system stack.
        // Cgo may have left stack size in stack.hi.
        // minit may update the stack bounds.
        //
        // Note: these bounds may not be very accurate.
        // We set hi to &size, but there are things above
        // it. The 1024 is supposed to compensate this,
        // but is somewhat arbitrary.
        size := _g_.stack.hi
        if size == 0 {
            size = 8192 * sys.StackGuardMultiplier
        }
        _g_.stack.hi = uintptr(noescape(unsafe.Pointer(&size)))
        _g_.stack.lo = _g_.stack.hi - size + 1024
    }
    // Initialize stack guard so that we can start calling regular
    // Go code.
    _g_.stackguard0 = _g_.stack.lo + _StackGuard
    // This is the g0, so we can also call go:systemstack
    // functions, which check stackguard1.
    _g_.stackguard1 = _g_.stackguard0
    mstart1()

    // Exit this thread.
    if mStackIsSystemAllocated() {
        // Windows, Solaris, illumos, Darwin, AIX and Plan 9 always system-allocate
        // the stack, but put it in _g_.stack before mstart,
        // so the logic above hasn't set osStack yet.
        osStack = true
    }
    mexit(osStack)
}


func mstart1() {
    _g_ := getg()

    if _g_ != _g_.m.g0 {
        throw("bad runtime·mstart")
    }

    // Record the caller for use as the top of stack in mcall and
    // for terminating the thread.
    // We're never coming back to mstart1 after we call schedule,
    // so other calls can reuse the current frame.
    save(getcallerpc(), getcallersp())                      // 把pc, ps保存到当前g.sched
    asminit()
    minit()                                                 // 初始化信号，获取线程id

    // Install signal handlers; after minit so that minit can
    // prepare the thread to be able to handle the signals.
    if _g_.m == &m0 {
        mstartm0()                                       // 初始化信号相关
    }
        // 如果有m的起始任务函数，则执行，比如 sysmon 函数
    if fn := _g_.m.mstartfn; fn != nil {
        fn()
    }

    if _g_.m != &m0 {                                        // 绑定nextp和当前m
        acquirep(_g_.m.nextp.ptr())
        _g_.m.nextp = 0
    }
    schedule()
}

绑定号p之后，m拥有了可分配cache和执行队列，进入核心调度循环，核心调度从schedule函数开始，调度完一次之后会引导重新执行schedule，实现循环调度。

schedule

// One round of scheduler: find a runnable goroutine and execute it.
// Never returns.
func schedule() {
    _g_ := getg()

    if _g_.m.locks != 0 {
        throw("schedule: holding locks")
    }
    // 如果当前M锁定了某个G，那么应该交出P，进入休眠
    // 等待某个M调度拿到lockedg，然后唤醒lockedg的M
    if _g_.m.lockedg != 0 {
        stoplockedm()
        execute(_g_.m.lockedg.ptr(), false) // Never returns.
    }

    // We should not schedule away from a g that is executing a cgo call,
    // since the cgo call is using the m's g0 stack.
    if _g_.m.incgo {
        throw("schedule: in cgo")
    }

top:
    pp := _g_.m.p.ptr()
    pp.preempt = false
    // 如果当前GC需要停止整个世界（STW), 则调用gcstopm休眠当前的M
    if sched.gcwaiting != 0 {
        gcstopm()
        goto top
    }
    if pp.runSafePointFn != 0 {
        runSafePointFn()
    }

    // Sanity check: if we are spinning, the run queue should be empty.
    // Check this before calling checkTimers, as that might call
    // goready to put a ready goroutine on the local run queue.
    if _g_.m.spinning && (pp.runnext != 0 || pp.runqhead != pp.runqtail) {
        throw("schedule: spinning with local work")
    }

    checkTimers(pp, 0)

    var gp *g
    var inheritTime bool

    // Normal goroutines will check for need to wakeP in ready,
    // but GCworkers and tracereaders will not, so the check must
    // be done here instead.
    tryWakeP := false
    if trace.enabled || trace.shutdown {
        gp = traceReader()
        if gp != nil {
            casgstatus(gp, _Gwaiting, _Grunnable)
            traceGoUnpark(gp, 0)
            tryWakeP = true
        }
    }
    // 如果当前GC正在标记阶段, 则查找有没有待运行的GC Worker, GC Worker也是一个G
    if gp == nil && gcBlackenEnabled != 0 {
        gp = gcController.findRunnableGCWorker(_g_.m.p.ptr())
        tryWakeP = tryWakeP || gp != nil
    }
    if gp == nil {
        // Check the global runnable queue once in a while to ensure fairness.
        // Otherwise two goroutines can completely occupy the local runqueue
        // by constantly respawning each other.
        // 每隔61次调度，尝试从全局队列种获取G
                // ? 为何是61次？ https://github.com/golang/go/issues/20168
        if _g_.m.p.ptr().schedtick%61 == 0 && sched.runqsize > 0 {
            lock(&sched.lock)
            gp = globrunqget(_g_.m.p.ptr(), 1)
            unlock(&sched.lock)
        }
    }
    if gp == nil {
            // 从p的本地队列中获取
        gp, inheritTime = runqget(_g_.m.p.ptr())
        // We can see gp != nil here even if the M is spinning,
        // if checkTimers added a local goroutine via goready.
    }
    if gp == nil {
            // 想尽办法找到可运行的G，找不到就不用返回了
        gp, inheritTime = findrunnable() // blocks until work is available
    }

    // This thread is going to run a goroutine and is not spinning anymore,
    // so if it was marked as spinning we need to reset it now and potentially
    // start a new spinning M.
    // 重置为非自旋，并根据需要唤醒或新建一个M来运行
    if _g_.m.spinning {
        resetspinning()
    }

    if sched.disable.user && !schedEnabled(gp) {
        // Scheduling of this goroutine is disabled. Put it on
        // the list of pending runnable goroutines for when we
        // re-enable user scheduling and look again.
        lock(&sched.lock)
        if schedEnabled(gp) {
            // Something re-enabled scheduling while we
            // were acquiring the lock.
            unlock(&sched.lock)
        } else {
            sched.disable.runnable.pushBack(gp)
            sched.disable.n++
            unlock(&sched.lock)
            goto top
        }
    }

    // If about to schedule a not-normal goroutine (a GCworker or tracereader),
    // wake a P if there is one.
    if tryWakeP {
        wakep()
    }
    if gp.lockedm != 0 {
            // 如果找到的G已经锁定M了，dolockOSThread和cgo会将G和M绑定
                // 则用startlockedm执行，将P和G都交给对方lockedm，唤醒绑定M-lockedm，自己回空闲队列。
        // Hands off own p to the locked m,
        // then blocks waiting for a new p.
        startlockedm(gp)
        goto top
    }

    execute(gp, inheritTime)
}

schedule方法会主要功能：尽可能给m找到可以运行的g，这其中主要是分为以下几种：

当前m已经指定了g。该情况下会将m与p解绑，然后m睡眠，等待被绑定的g被调度然后唤醒该m执行该g
gc触发STW的时候，m直接睡眠
gcmark（标记）阶段，大概有1/4的g用来并行标记，这里也会检测是否调度gc标记的g（gcBlackenEnabled!=0）
调度61次后会从全局的g队列中尝试获取g
全局队列中未获取到便去绑定p的本地任务队列获取g
还未获取便调用findrunnable()去尽可能获取，取不到便会睡眠，不返回。
获取到的g有绑定的m，交出当前的p和g，与指定的m绑定，唤醒指定的m，自己睡眠，等待唤醒。
执行获取到的g

execute

func execute(gp *g, inheritTime bool) {
    _g_ := getg()

    // Assign gp.m before entering _Grunning so running Gs have an
    // M.
    // 当前的M的G改为gp
    _g_.m.curg = gp
     // gp的M改为当前的M
    gp.m = _g_.m
    // 更改gp的状态为_Grunning
    casgstatus(gp, _Grunnable, _Grunning)
    // 置等待时间为0
    gp.waitsince = 0
    // 置可抢占标志为fasle
    gp.preempt = false
    gp.stackguard0 = gp.stack.lo + _StackGuard
    // 如果不是inheritTime，schedtick累加
    if !inheritTime {
        _g_.m.p.ptr().schedtick++
    }

    // Check whether the profiler needs to be turned on or off.
    hz := sched.profilehz
    if _g_.m.profilehz != hz {
        setThreadCPUProfiler(hz)
    }

    if trace.enabled {
        // GoSysExit has to happen when we have a P, but before GoStart.
        // So we emit it here.
        if gp.syscallsp != 0 && gp.sysblocktraced {
            traceGoSysExit(gp.sysexitticks)
        }
        traceGoStart()
    }
        // gogo由汇编实现， runtime/asm_amd64.s
        // 实现当前的G切换到gp，然后用JMP跳转到G的任务函数
        // 当任务函数执行完后会调用 goexit
    gogo(&gp.sched)
}

gcstopm

// Stops the current m for stopTheWorld.
// Returns when the world is restarted.
func gcstopm() {
    _g_ := getg()

    if sched.gcwaiting == 0 {
        throw("gcstopm: not waiting for gc")
    }
    if _g_.m.spinning {                     // 取消自旋
        _g_.m.spinning = false
        // OK to just drop nmspinning here,
        // startTheWorld will unpark threads as necessary.
        if int32(atomic.Xadd(&sched.nmspinning, -1)) < 0 {
            throw("gcstopm: negative nmspinning")
        }
    }
    _p_ := releasep()                       // 取消p与当前m的关联。
    lock(&sched.lock)
    _p_.status = _Pgcstop
    sched.stopwait--
    if sched.stopwait == 0 {
        notewakeup(&sched.stopnote)
    }
    unlock(&sched.lock)
    stopm()                                // 释放m，等待新的空闲m
}

findrunnable

该方法会想尽一切办法找到可以执行的任务，核心调度函数
这里逻辑较为复杂，下面将以代码中的两个标签top和stop将流程分开：
top label：

stop label：


// 找到一个可以运行的G，不找到就让M休眠，然后等待唤醒，直到找到一个G返回
func findrunnable() (gp *g, inheritTime bool) {
    _g_ := getg()
    // 此处和handoffp中的条件必须一致：如果findrunnable将返回G运行，则handoffp必须启动M.
top:
    _p_ := _g_.m.p.ptr()
    // 如果gc正等着运行，停止M，也就是STW
    if sched.gcwaiting != 0 {
        gcstopm()
        goto top
    }
    if _p_.runSafePointFn != 0 {
        runSafePointFn()
    }
    // fing是执行finalizer的goroutine
    if fingwait && fingwake {
        if gp := wakefing(); gp != nil {
            ready(gp, 0, true)
        }
    }
    if *cgo_yield != nil {
        asmcgocall(*cgo_yield, nil)
    }
    // local runq
    // 再尝试从本地队列中获取G
    if gp, inheritTime := runqget(_p_); gp != nil {
        return gp, inheritTime
    }
    // global runq
    // 尝试从全局队列中获取G
    if sched.runqsize != 0 {
        lock(&sched.lock)
        gp := globrunqget(_p_, 0)
        unlock(&sched.lock)
        if gp != nil {
            return gp, false
        }
    }
    // 从网络IO轮询器中找到就绪的G，把这个G变为可运行的G
    if netpollinited() && atomic.Load(&netpollWaiters) > 0 && atomic.Load64(&sched.lastpoll) != 0 {
        if gp := netpoll(false); gp != nil { // non-blocking
            // netpoll returns list of goroutines linked by schedlink.
            // 如果找到的可运行的网络IO的G列表，则把相关的G插入全局队列
            injectglist(gp.schedlink.ptr())
            // 更改G的状态为_Grunnable，以便下次M能找到这些G来执行
            casgstatus(gp, _Gwaiting, _Grunnable)
            // goroutine trace事件记录-unpark
            if trace.enabled {
                traceGoUnpark(gp, 0)
            }
            return gp, false
        }
    }
    // Steal work from other P's.
    procs := uint32(gomaxprocs)
    // 如果其他P都是空闲的，就不从其他P哪里偷取G了
    if atomic.Load(&sched.npidle) == procs-1 {
        goto stop
    }
    // 如果当前的M没在自旋 且 正在自旋的M数量大于等于正在使用的P的数量，那么block
    // 当GOMAXPROCS远大于1，但程序并行度低时，防止过多的CPU消耗。
    if !_g_.m.spinning && 2*atomic.Load(&sched.nmspinning) >= procs-atomic.Load(&sched.npidle) {
        goto stop
    }
    // 如果M为非自旋，那么设置为自旋状态
    if !_g_.m.spinning {
        _g_.m.spinning = true
        atomic.Xadd(&sched.nmspinning, 1)
    }
    // 随机选一个P，尝试从这P中偷取一些G
    for i := 0; i < 4; i++ { // 尝试四次
        for enum := stealOrder.start(fastrand()); !enum.done(); enum.next() {
            if sched.gcwaiting != 0 {
                goto top
            }
            stealRunNextG := i > 2 // first look for ready queues with more than 1 g
            // 从allp[enum.position()]偷去一半的G，并返回其中的一个
            if gp := runqsteal(_p_, allp[enum.position()], stealRunNextG); gp != nil {
                return gp, false
            }
        }
    }
stop:
    // 当前的M找不到G来运行。如果此时P处于 GC mark 阶段
    // 那么此时可以安全的扫描和黑化对象，和返回 gcBgMarkWorker 来运行
    if gcBlackenEnabled != 0 && _p_.gcBgMarkWorker != 0 && gcMarkWorkAvailable(_p_) {
        // 设置gcMarkWorkerMode 为 gcMarkWorkerIdleMode
        _p_.gcMarkWorkerMode = gcMarkWorkerIdleMode
        // 获取gcBgMarkWorker goroutine
        gp := _p_.gcBgMarkWorker.ptr()
        casgstatus(gp, _Gwaiting, _Grunnable)
        if trace.enabled {
            traceGoUnpark(gp, 0)
        }
        return gp, false
    }
    allpSnapshot := allp
    // return P and block
    lock(&sched.lock)
    if sched.gcwaiting != 0 || _p_.runSafePointFn != 0 {
        unlock(&sched.lock)
        goto top
    }
    // 再次从全局队列中获取G
    if sched.runqsize != 0 {
        gp := globrunqget(_p_, 0)
        unlock(&sched.lock)
        return gp, false
    }
    // 将当前对M和P解绑
    if releasep() != _p_ {
        throw("findrunnable: wrong p")
    }
    // 将p放入p空闲链表
    pidleput(_p_)
    unlock(&sched.lock)
    wasSpinning := _g_.m.spinning
    // M取消自旋状态
    if _g_.m.spinning {
        _g_.m.spinning = false
        if int32(atomic.Xadd(&sched.nmspinning, -1)) < 0 {
            throw("findrunnable: negative nmspinning")
        }
    }
    // check all runqueues once again
    // 再次检查所有的P，有没有可以运行的G
    for _, _p_ := range allpSnapshot {
        // 如果p的本地队列有G
        if !runqempty(_p_) {
            lock(&sched.lock)
            // 获取另外一个空闲P
            _p_ = pidleget()
            unlock(&sched.lock)
            if _p_ != nil {
                // 如果P不是nil，将M绑定P
                acquirep(_p_)
                // 如果是自旋，设置M为自旋
                if wasSpinning {
                    _g_.m.spinning = true
                    atomic.Xadd(&sched.nmspinning, 1)
                }
                // 返回到函数开头，从本地p获取G
                goto top
            }
            break
        }
    }
    // gcmark的goroutine，这里会控制这类g的数量
    if gcBlackenEnabled != 0 && gcMarkWorkAvailable(nil) {
        lock(&sched.lock)
        _p_ = pidleget()
        if _p_ != nil && _p_.gcBgMarkWorker == 0 {
            pidleput(_p_)
            _p_ = nil
        }
        unlock(&sched.lock)
        if _p_ != nil {
            acquirep(_p_)
            if wasSpinning {
                _g_.m.spinning = true
                atomic.Xadd(&sched.nmspinning, 1)
            }
            goto stop
        }
    }
    // poll network
    // 再次检查netpoll
    if netpollinited() && atomic.Load(&netpollWaiters) > 0 && atomic.Xchg64(&sched.lastpoll, 0) != 0 {
        gp := netpoll(true) // block until new work is available
        if gp != nil {
            lock(&sched.lock)
            _p_ = pidleget()
            unlock(&sched.lock)
            acquirep(_p_)
            injectglist(gp.schedlink.ptr())
            casgstatus(gp, _Gwaiting, _Grunnable)
            return gp, false
        }
    }
    // 实在找不到G，那就休眠吧
    // 且此时的M一定不是自旋状态
    stopm()
    goto top
}

以下给出一个工作线程的执行流程简图

引用

[1]. Golang源码学习：调度逻辑（三）工作线程的执行流程与调度循环 https://bbs.huaweicloud.com/b...

golang调度学习-调度流程 (四) mstart

mstart

schedule

execute

gcstopm

findrunnable

引用

xxx小M

引用和评论

Go Modules

Go 语言-计算密集型服务性能优化

IO 密集型服务耗时优化

Go 语言 JSON 与 Cache 库调研与选型

告别ELK，APO提供基于ClickHouse开箱即用的高效日志方案——APO 0.6.0发布

Go 语言-内存泄漏排查两例

Golang GC 从原理到优化

golang调度学习-调度流程 (四) mstart

mstart

schedule

execute

gcstopm

findrunnable

引用

xxx小M

引用和评论

Go Modules

Go 语言-计算密集型服务 性能优化

IO 密集型服务 耗时优化

Go 语言 JSON 与 Cache 库 调研与选型

告别ELK，APO提供基于ClickHouse开箱即用的高效日志方案——APO 0.6.0发布

Go 语言-内存泄漏排查两例

Golang GC 从原理到优化

Go 语言-计算密集型服务性能优化

IO 密集型服务耗时优化

Go 语言 JSON 与 Cache 库调研与选型