转于文章: https://studygolang.com/artic...

声明

下面的分析均基于Golang1.14版本。(我本地是1.16.2)
不同硬件平台使用的汇编文件不同,本文分析的函数mcall, systemstack, asmcgocall是基于asm_arm64.s汇编文件。
不用操作系统平台使用的系统调用不同,本文分析的函数syscall是基于asm_linux_arm64.s汇编文件。

概念

TLS

thread local storage

getg()

goget()用来获取当前线程正在执行的协程g。该协程g被存储在TLS中。

CPU的上下文

这些函数的本质都是为了切换goroutine,goroutine切换时需要切换CPU执行的上下文,主要有2个寄存器的值SP(当前线程使用的栈的栈顶地址),PC(下一个要执行的指令的地址)。

mcall函数

mcall在golang需要进行协程切换时被调用,用来保存被切换出去协程的信息,并在当前线程的g0协程堆栈上执行新的函数。一般情况下,会在新函数中执行一次schedule()来挑选新的协程来运行。
mcall函数的定义如下,mcall传入的是函数指针,传入函数的类型如下,只有一个参数goroutine的指针,无返回值。

func mcall(fn func(*g) 

mcall函数的作用是在系统栈中执行调度代码,并且调度代码不会返回,将在运行过程中又一次执行mcall。mcall的流程是保存当前的g的上下文,切换到g0的上下文,传入函数参数,跳转到函数代码执行。

// void mcall(fn func(*g))
// Switch to m->g0's stack, call fn(g).
// Fn must never return. It should gogo(&g->sched)
// to keep running g.
TEXT runtime·mcall(SB), NOSPLIT|NOFRAME, $0-8
    // Save caller state in g->sched
    //此时线程当前的sp pc bp等上下文都存在寄存器中 需要将寄存器的值写回g 下面就是写回g的过程
    MOVD    RSP, R0                     // R0 = RSP
    MOVD    R0, (g_sched+gobuf_sp)(g)   // g.sched.gobuf.sp = RO 保存sp寄存器的值
    MOVD    R29, (g_sched+gobuf_bp)(g)  // g.sched.gobuf.bp = R29 (R29保存bp值)
    MOVD    LR, (g_sched+gobuf_pc)(g)   // g.sched.gobuf.pc = LR (LR保存pc值)local return
    MOVD    $0, (g_sched+gobuf_lr)(g)   // g.sched.gobuf.lr = 0
    MOVD    g, (g_sched+gobuf_g)(g)     // g.sched.gobuf.g = g

    // Switch to m->g0 & its stack, call fn.
    // 将当前的g切为g0
    MOVD    g, R3                       // R3 = g (g表示当前调用mcall时的goutine)
    MOVD    g_m(g), R8                  // R8 = g.m (R8表示g绑定的m 即当前的m)
    MOVD    m_g0(R8), g                 // g = m.g0 (将当前g切换为g0)
    BL  runtime·save_g(SB)              // 同时也要在cgo里面也要改一下g
    CMP g, R3                           // g0不能调用mcall
    BNE 2(PC)                           // 如果不相等则正常执行
    B   runtime·badmcall(SB)            // 相等则说明有bug 调用badmcall
    
    // fn是要调用的函数 写入寄存器
    MOVD    fn+0(FP), R26               // R26存fn的_func结构体
    MOVD    0(R26), R4                  // code pointer R4也是fn的pc值
    MOVD    (g_sched+gobuf_sp)(g), R0   // R0 = g0.sched.gobuf.sp
    MOVD    R0, RSP                     // sp = R0
    MOVD    (g_sched+gobuf_bp)(g), R29  // bp = g0.sched.gobuf.bp 
    MOVD    R3, -8(RSP)                 // R3在之前被赋值为调用mcall的g 现在写入g0的栈中 作为fn的函数参数
    MOVD    $0, -16(RSP)                // lr = 0, 理论上fn永远不会返回,也就无所谓lr是什么值了
    SUB $16, RSP                        // 对栈进行偏移16byte(上面g $0 各占8byte)
    BL  (R4)                            // R4此时是fn的pc值 跳到该 PC执行fn
    B   runtime·badmcall2(SB)           // 该函数永远不会返回 因此这一步理论上永远执行不到 

执行 BL (R4)时, 寄存器情况

g = g0
R4 = pc
sp = g0.sched.gobuf.sp
bp = g0.sched.gobuf.bp

常见的调用mcall执行的函数有:
这些函数最终都是调用schedule,永远不会返回

mcall(gosched_m)
mcall(park_m)
mcall(goexit0)
mcall(exitsyscall0)
mcall(preemptPark)
mcall(gopreempt_m) 

systemstack函数

systemstack函数的定义如下,传入的函数无参数,无返回值。

func systemstack(fn func()) 

systemstack函数的作用是在系统栈中执行只能由g0(或gsignal?)执行的调度代码,和mcall不同的是,在执行完调度代码后会切回到现在正在执行的代码。
主要流程是先判断当前运行的g是否为g0或者gsignal,如果是则直接运行,不是则先切换到g0,执行完函数后切换为g返回调用处。

// systemstack_switch is a dummy routine that systemstack leaves at the bottom
// of the G stack. We need to distinguish the routine that
// lives at the bottom of the G stack from the one that lives
// at the top of the system stack because the one at the top of
// the system stack terminates the stack walk (see topofstack()).
TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
    RET

// func systemstack(fn func())
TEXT runtime·systemstack(SB), NOSPLIT, $0-8
    MOVQ    fn+0(FP), DI                            // DI = _func(fn)
    get_tls(CX)                                     // CX = g
    MOVQ    g(CX), AX                                // AX = g
    MOVQ    g_m(AX), BX                                // BX = m

    CMPQ    AX, m_gsignal(BX)                       // 判断 g == m.gsignal
    JEQ    noswitch

    MOVQ    m_g0(BX), DX                            // DX = g0
    CMPQ    AX, DX                                  // 判断 g == g0
    JEQ    noswitch

    CMPQ    AX, m_curg(BX)                          // 判断 g != m.curg
    JNE    bad

    // switch stacks
    // save our state in g->sched. Pretend to
    // be systemstack_switch if the G stack is scanned.
    // 假装正在调用systemstack_switch
    MOVQ    $runtime·systemstack_switch(SB), SI 
    MOVQ    SI, (g_sched+gobuf_pc)(AX)                  // g.sched.gobuf.pc = $runtime·systemstack_switch(SB)
    MOVQ    SP, (g_sched+gobuf_sp)(AX)                  // g.sched.gobuf.sp = SP
    MOVQ    AX, (g_sched+gobuf_g)(AX)                   // g.sched.gobuf.g = g
    MOVQ    BP, (g_sched+gobuf_bp)(AX)                  // g.sched.gobuf.bp = BP

    // switch to g0
    MOVQ    DX, g(CX)                                   // g = g0
    MOVQ    (g_sched+gobuf_sp)(DX), BX                  // BX = g0.sche.gobuf.sp
    // make it look like mstart called systemstack on g0, to stop traceback
    // 下面给sp push $runtime·mstart(SB)函数指针, 假装是mstart在调用systemstack, panic用么?
    SUBQ    $8, BX                                      // BX = BX - 8
    MOVQ    $runtime·mstart(SB), DX                     // DX = $runtime·mstart(SB)
    MOVQ    DX, 0(BX)                                   // 0(BX) = $runtime·mstart(SB), 也就是 -8(SP) = $runtime·mstart(SB)
    MOVQ    BX, SP                                      // SP = BX

    // call target function
    MOVQ    DI, DX                                      // DI = DX = fn
    MOVQ    0(DI), DI                                   // DI = fn.entry
    CALL    DI                                          // 调用fn

    // switch back to g
    get_tls(CX)                                         // CX = g0
    MOVQ    g(CX), AX
    MOVQ    g_m(AX), BX                                 // BX = g.m
    MOVQ    m_curg(BX), AX                              // AX = m.curg
    MOVQ    AX, g(CX)                                   // g = AX, 切换回来
    MOVQ    (g_sched+gobuf_sp)(AX), SP                  // SP = g.shed.gobuf.sp
    MOVQ    $0, (g_sched+gobuf_sp)(AX)                  // g.shed.gobuf.sp = 0
    RET

noswitch:
    // already on m stack; tail call the function
    // Using a tail call here cleans up tracebacks since we won't stop
    // at an intermediate systemstack.
    MOVQ    DI, DX
    MOVQ    0(DI), DI
    JMP    DI                                  // 直接跳过去

bad:
    // Bad: g is not gsignal, not g0, not curg. What is it?
    MOVQ    $runtime·badsystemstack(SB), AX
    CALL    AX
    INT    $3

asmcgocall函数

asmcgocall函数定义如下,传入的参数有2个为函数指针和参数指针,返回参数为int32。

func asmcgocall(fn, arg unsafe.Pointer) int32 

asmcgocall函数的作用是执行cgo代码,该部分代码只能在g0(或gsignal, osthread)的栈执行,因此流程是先判断当前的栈是否要切换,如果无需切换则直接执行nosave然后返回,否则先保存当前g的上下文,然后切换到g0,执行完cgo代码后切回g,然后返回。

// func asmcgocall(fn, arg unsafe.Pointer) int32
// Call fn(arg) on the scheduler stack,
// aligned appropriately for the gcc ABI.
// See cgocall.go for more details.
TEXT ·asmcgocall(SB),NOSPLIT,$0-20
    MOVD    fn+0(FP), R1  // R1 = fn
    MOVD    arg+8(FP), R0  // R2 = arg

    MOVD    RSP, R2     // save original stack pointer
    CBZ g, nosave  // 如果g为nil 则跳转到 nosave。 g == nil是否说明当前是osthread?
    MOVD    g, R4  // R4 = g

    // Figure out if we need to switch to m->g0 stack.
    // We get called to create new OS threads too, and those
    // come in on the m->g0 stack already.
    MOVD    g_m(g), R8 // R8 = g.m
    MOVD    m_gsignal(R8), R3 // R3 = g.m.gsignal
    CMP R3, g  // 如果g == g.m.signal jump nosave
    BEQ nosave
    MOVD    m_g0(R8), R3 // 如果g== m.g0 jump nosave
    CMP R3, g
    BEQ nosave

    // Switch to system stack.
    // save g的上下文
    MOVD    R0, R9  // gosave<> and save_g might clobber R0
    BL  gosave<>(SB)
    MOVD    R3, g
    BL  runtime·save_g(SB)
    MOVD    (g_sched+gobuf_sp)(g), R0
    MOVD    R0, RSP
    MOVD    (g_sched+gobuf_bp)(g), R29
    MOVD    R9, R0

    // Now on a scheduling stack (a pthread-created stack).
    // Save room for two of our pointers /*, plus 32 bytes of callee
    // save area that lives on the caller stack. */
    MOVD    RSP, R13
    SUB $16, R13
    MOVD    R13, RSP  // RSP = RSP - 16
    MOVD    R4, 0(RSP)  // save old g on stack  RSP.0 = R4 = oldg
    MOVD    (g_stack+stack_hi)(R4), R4 // R4 = old.g.stack.hi
    SUB R2, R4  // R4 = oldg.stack.hi - old_RSP
    MOVD    R4, 8(RSP)  // save depth in old g stack (can't just save SP, as stack might be copied during a callback)
    BL  (R1) // R1 = fn
    MOVD    R0, R9 // R9 = R0 = errno?

    // Restore g, stack pointer. R0 is errno, so don't touch it
    MOVD    0(RSP), g  // g = RSP.0 = oldg
    BL  runtime·save_g(SB)
    MOVD    (g_stack+stack_hi)(g), R5 // R5 = g.stack.hi
    MOVD    8(RSP), R6 // R6 = RSP + 8 = oldg.stack.hi - old_RSP
    SUB R6, R5 // R5 = R5 - R6 = old_RSP
    MOVD    R9, R0 // R0 = R9 = errno
    MOVD    R5, RSP // RSP = R5 = old_RSP

    MOVW    R0, ret+16(FP) // ret = R0 = errno
    RET

nosave:
    // Running on a system stack, perhaps even without a g.
    // Having no g can happen during thread creation or thread teardown
    // (see needm/dropm on Solaris, for example).
    // This code is like the above sequence but without saving/restoring g
    // and without worrying about the stack moving out from under us
    // (because we're on a system stack, not a goroutine stack).
    // The above code could be used directly if already on a system stack,
    // but then the only path through this code would be a rare case on Solaris.
    // Using this code for all "already on system stack" calls exercises it more,
    // which should help keep it correct.
    MOVD    RSP, R13 
    SUB $16, R13  
    MOVD    R13, RSP // RSP = RSP - 16
    MOVD    $0, R4 // R4 = 0
    MOVD    R4, 0(RSP)  // Where above code stores g, in case someone looks during debugging.
    MOVD    R2, 8(RSP)  // Save original stack pointer.  RSP + 8 = old_R2
    BL  (R1)
    // Restore stack pointer.
    MOVD    8(RSP), R2  // R2 = RSP + 8 = old_R2
    MOVD    R2, RSP // RSP = old_R2 = old_RSP
    MOVD    R0, ret+16(FP) // ret = R0 = errno
    RET 

syscall函数

Syscall函数的定义如下,传入4个参数,返回3个参数。

func syscall(fn, a1, a2, a3 uintptr) (r1, r2 uintptr, err Errno) 

syscall函数的作用是传入系统调用的地址和参数,执行完成后返回。流程主要是系统调用前执行entersyscall,设置g p的状态,然后入参,执行后,写返回值然后执行exitsyscall设置g p的状态。
entersyscall和exitsyscall在g的调用中细讲。

// func Syscall(trap int64, a1, a2, a3 uintptr) (r1, r2, err uintptr);
// Trap # in AX, args in DI SI DX R10 R8 R9, return in AX DX
// Note that this differs from "standard" ABI convention, which
// would pass 4th arg in CX, not R10.

// 4个入参:PC param1 param2 param3
TEXT ·Syscall(SB),NOSPLIT,$0-56
    // 调用entersyscall 判断是执行条件是否满足 记录调度信息 切换g p的状态
    CALL    runtime·entersyscall(SB)
    // 将参数存入寄存器中
    MOVQ    a1+8(FP), DI
    MOVQ    a2+16(FP), SI
    MOVQ    a3+24(FP), DX
    MOVQ    trap+0(FP), AX  // syscall entry
    SYSCALL
    CMPQ    AX, $0xfffffffffffff001
    JLS ok
    // 执行失败时 写返回值
    MOVQ    $-1, r1+32(FP)
    MOVQ    $0, r2+40(FP)
    NEGQ    AX
    MOVQ    AX, err+48(FP)
    // 调用exitsyscall 记录调度信息
    CALL    runtime·exitsyscall(SB)
    RET
ok:
    // 执行成功时 写返回值
    MOVQ    AX, r1+32(FP)
    MOVQ    DX, r2+40(FP)
    MOVQ    $0, err+48(FP)
    CALL    runtime·exitsyscall(SB)
    RET 

除了Syscal还有Syscall6(除fn还有6个参数)对应有6个参数的系统调用。实现大同小异,这里不分析。

gogo()

gogo的作用正好相反,用来从gobuf中恢复出协程执行状态并跳转到上一次指令处继续执行。因此,其代码也相对比较容易理解,我们就不过多赘述,如下:

gogo()主要的调用路径:schedule()–>execute()–>googo()

// func gogo(buf *gobuf)
// restore state from Gobuf; longjmp
TEXT runtime·gogo(SB), NOSPLIT, $16-8
    MOVQ buf+0(FP), BX                      // BX = gobuf
    MOVQ gobuf_g(BX), DX                    // DX = gobuf.g
    MOVQ 0(DX), CX                          // CX = DX,  make sure g != nil
    get_tls(CX)                             // CX = g
    MOVQ DX, g(CX)                          // g = DX = gobuf.g     切换g为gobuf.g 
    MOVQ gobuf_sp(BX), SP                   // SP = gobuf.sp,       restore SP
    MOVQ gobuf_ret(BX), AX                  // AX = gobuf.ret
    MOVQ gobuf_ctxt(BX), DX                 // DX = gobuf.ctxt
    MOVQ gobuf_bp(BX), BP                   // BP = gobuf.bp
    MOVQ $0, gobuf_sp(BX)                   // clear to help garbage collector
    MOVQ $0, gobuf_ret(BX)
    MOVQ $0, gobuf_ctxt(BX)
    MOVQ $0, gobuf_bp(BX)
    MOVQ gobuf_pc(BX), BX                   //  BX = gobuf.pc
    JMP BX                                  // JMP gobuf.pc

总结与思考

1.汇编函数的作用。为什么golang一定要引入汇编函数呢?因为CPU执行时的上下文是寄存器,只有汇编语言才能操作寄存器。
2.CPU的上下文和g.sched(gobuf)结构体中的字段一一对应,只有10个以内的字段,因此切换上下文效率非常的高。
3.除了golang,其它在用的语言是否要有类似的汇编来实现语言和操作系统之间的交互?

最后

asmcgocall函数就不看直接用原文
syscall 有空再补上

参考资料

在整理的过程中,部分参考、引用下面链接地址内容。有一些写的还是不错的,感兴趣的同学可以阅读
[1] https://zhuanlan.zhihu.com/p/...


xxx小M
30 声望11 粉丝

暂时放一些读书笔记, 很多内容没有整理好