This article is authored by HeapDump performance community chief lecturer Kumo (Ma Zhi) to collate and publish

Chapter 17-x86-64 Register

The machine language system that can be interpreted by different CPUs is called instruction set architecture (ISA, Instruction Set Architecture), which can also be called instruction set (instruction set). Intel refers to the 32-bit CPU instruction set architecture of the x86 series of CPUs as IA-32. IA is the abbreviation of "Intel Architecture" and can also be called i386 and x86-32. AMD is equivalent to Intell and proposed a 64-bit extension of the x86 series, so the 64-bit instruction set architecture of the x86 series designed by AMD is called AMD64. Later, Intel added almost the same instruction set as AMD64 in its own CPU, called the Intel 64 instruction set. AMD64 and Intel 64 can be collectively referred to as x86-64.

All x86-64 registers are the same as the machine word length (data bus bit width), that is, 64 bits. x86-64 expands the 8 32-bit general registers of x86 to 64 bits (eax, ebx, ecx, edx, eci, edi, ebp, esp), and 8 new 64-bit registers (r8-r15) have been added. The naming method has also changed from "exx" to "rxx", but "exx" is still reserved for 32-bit Operation, the following table describes the naming and function of each register.

describe32 bit64 bit
General register bankeaxrax
ecxrcx
edxrdx
ebxrbx
esprsp
ebprbp
esirsi
edirdi
-r8~r15
Floating-point register bankst0~st7st0~st7
XMM register bankXMM0~XMM7XMM0~XMM15 

Among them, %esp and %ebp are used for special purposes to save pointers to specific locations in the program stack.

There is also the eflags register, which uses bits to express specific meanings, as shown in the figure below.

In HotSpot VM, the classes that represent registers are inherited from the AbstractRegisterImpl class. The definition of this class is as follows:

源代码位置:hotspot/src/share/vm/asm/register.hpp

class AbstractRegisterImpl;
typedef AbstractRegisterImpl* AbstractRegister;

class AbstractRegisterImpl {
 protected:
  int value() const  { return (int)(intx)this; }
}; 

The inheritance system of the AbstractRegisterImpl class is shown in the figure below.

In addition, there is a ConcreteRegisterImpl class that also inherits AbstractRegisterImpl. This gray is related to the implementation of the C2 compiler, so I won't explain it too much here.

1, RegisterImpl class

The RegisterImpl class is used to represent general-purpose registers. The definition of the class is as follows:

源代码位置:cpu/x86/vm/register_x86.hpp

// 使用Register做为RegisterImpl的简称
class RegisterImpl;
typedef RegisterImpl* Register;

class RegisterImpl: public AbstractRegisterImpl {
 public:
  enum {
    number_of_registers      = 16,
    number_of_byte_registers = 16
  };
  // ...
};

For 64 bits, the bit width of general-purpose registers is 64 bits, and part of eax, ebx, ecx, and edx can also be used as 8-bit registers, so the number of registers that can store bytes is 4.

Define the register in HotSpot VM as follows:

源代码位置:hotspot/src/cpu/x86/vm/register_x86.hpp

CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1)); // noreg_RegisterEnumValue = ((-1))
CONSTANT_REGISTER_DECLARATION(Register, rax,    (0)); // rax_RegisterEnumValue = ((0))
CONSTANT_REGISTER_DECLARATION(Register, rcx,    (1)); // rcx_RegisterEnumValue = ((1))
CONSTANT_REGISTER_DECLARATION(Register, rdx,    (2)); // rdx_RegisterEnumValue = ((2))
CONSTANT_REGISTER_DECLARATION(Register, rbx,    (3)); // rbx_RegisterEnumValue = ((3))
CONSTANT_REGISTER_DECLARATION(Register, rsp,    (4)); // rsp_RegisterEnumValue = ((4))
CONSTANT_REGISTER_DECLARATION(Register, rbp,    (5)); // rbp_RegisterEnumValue = ((5))
CONSTANT_REGISTER_DECLARATION(Register, rsi,    (6)); // rsi_RegisterEnumValue = ((6))
CONSTANT_REGISTER_DECLARATION(Register, rdi,    (7)); // rdi_RegisterEnumValue = ((7))
CONSTANT_REGISTER_DECLARATION(Register, r8,     (8)); // r8_RegisterEnumValue = ((8))
CONSTANT_REGISTER_DECLARATION(Register, r9,     (9)); // r9_RegisterEnumValue = ((9))
CONSTANT_REGISTER_DECLARATION(Register, r10,   (10)); // r10_RegisterEnumValue = ((10))
CONSTANT_REGISTER_DECLARATION(Register, r11,   (11)); // r11_RegisterEnumValue = ((11))
CONSTANT_REGISTER_DECLARATION(Register, r12,   (12)); // r12_RegisterEnumValue = ((12))
CONSTANT_REGISTER_DECLARATION(Register, r13,   (13)); // r13_RegisterEnumValue = ((13))
CONSTANT_REGISTER_DECLARATION(Register, r14,   (14)); // r14_RegisterEnumValue = ((14))
CONSTANT_REGISTER_DECLARATION(Register, r15,   (15)); // r15_RegisterEnumValue = ((15))

The macro CONSTANT_REGISTER_DECLARATION is defined as follows:

源代码位置:hotspot/src/share/vm/asm/register.hpp

#define CONSTANT_REGISTER_DECLARATION(type, name, value)   \
  extern const type name;                                  \
  enum { name##_##type##EnumValue = (value) }

After macro expansion, it is as follows:

extern const Register  rax;
enum { rax_RegisterEnumValue = ((0)) }
extern const Register  rcx;
enum { rcx_RegisterEnumValue = ((1)) }
extern const Register  rdx;
enum { rdx_RegisterEnumValue = ((2)) }
extern const Register  rbx;
enum { rbx_RegisterEnumValue = ((3)) }
extern const Register  rsp;
enum { rsp_RegisterEnumValue = ((4)) }
extern const Register  rbp;
enum { rbp_RegisterEnumValue = ((5)) }
extern const Register  rsi;
enum { rsi_RegisterEnumValue = ((6)) }
extern const Register  rsi;
enum { rdi_RegisterEnumValue = ((7)) }
extern const Register  r8;
enum { r8_RegisterEnumValue = ((8)) }
extern const Register  r9;
enum { r9_RegisterEnumValue = ((9)) }
extern const Register  r10;
enum { r10_RegisterEnumValue = ((10)) }
extern const Register  r11;
enum { r11_RegisterEnumValue = ((11)) }
extern const Register  r12;
enum { r12_RegisterEnumValue = ((12)) }
extern const Register  r13;
enum { r13_RegisterEnumValue = ((13)) }
extern const Register  r14;
enum { r14_RegisterEnumValue = ((14)) }
extern const Register  r15;
enum { r15_RegisterEnumValue = ((15)) }

The above enumeration class assigns a constant value to the register.

The registers defined in the cpu/x86/vm/register_definitions_x86.cpp file are as follows:

const Register  noreg = ((Register)noreg_RegisterEnumValue)
const Register  rax =   ((Register)rax_RegisterEnumValue)
const Register  rcx =   ((Register)rcx_RegisterEnumValue)
const Register  rdx =   ((Register)rdx_RegisterEnumValue)
const Register  rbx =   ((Register)rbx_RegisterEnumValue)
const Register  rsp =   ((Register)rsp_RegisterEnumValue)
const Register  rbp =   ((Register)rbp_RegisterEnumValue)
const Register  rsi =   ((Register)rsi_RegisterEnumValue)
const Register  rdi =   ((Register)rdi_RegisterEnumValue)
const Register  r8 =  ((Register)r8_RegisterEnumValue)
const Register  r9 =  ((Register)r9_RegisterEnumValue)
const Register  r10 = ((Register)r10_RegisterEnumValue)
const Register  r11 = ((Register)r11_RegisterEnumValue)
const Register  r12 = ((Register)r12_RegisterEnumValue)
const Register  r13 = ((Register)r13_RegisterEnumValue)
const Register  r14 = ((Register)r14_RegisterEnumValue)
const Register  r15 = ((Register)r15_RegisterEnumValue)

When we need to use general-purpose registers, we can reference them through variables such as rax and rcx.

2、FloatRegisterImpl

In HotSpot VM, FloatRegisterImpl is used to represent floating-point registers. The definition of this class is as follows:

源代码位置:hotspot/src/cpu/x86/vm/register_x86.hpp

// 使用FloatRegister做为简称
class FloatRegisterImpl;
typedef FloatRegisterImpl* FloatRegister;

class FloatRegisterImpl: public AbstractRegisterImpl {
 public:
  enum {
    number_of_registers = 8
  };
  // ...
}

There are 8 floating-point registers, st0~st7, which are 8 80-bit registers.

It should be noted here that there is also a kind of register MMX. MMX is not a new register, but borrows the lower 64 bits of the 80-bit floating-point register. In other words, using the MMX instruction set will affect floating-point operations!

3、MMXRegisterImpl

MMX is a SIMD technology that can perform multiple data operations with one instruction. There are 8 64-bit registers (borrowing the lower 64 bits of the 80-bit floating-point register), mm0-mm7, which are comparable to other ordinary 64-bit registers. The difference of the register is that it can calculate two 32-bit data or four 16-bit data at the same time through the operation of its instructions, which can be applied to the calculation of the graphics color in the image processing process.

The definition of the MMXRegisterImpl class is as follows:

class MMXRegisterImpl;
typedef MMXRegisterImpl* MMXRegister;

The definition of MMX register is as follows:

CONSTANT_REGISTER_DECLARATION(MMXRegister, mnoreg , (-1));
CONSTANT_REGISTER_DECLARATION(MMXRegister, mmx0 , ( 0));
CONSTANT_REGISTER_DECLARATION(MMXRegister, mmx1 , ( 1));
CONSTANT_REGISTER_DECLARATION(MMXRegister, mmx2 , ( 2));
CONSTANT_REGISTER_DECLARATION(MMXRegister, mmx3 , ( 3));
CONSTANT_REGISTER_DECLARATION(MMXRegister, mmx4 , ( 4));
CONSTANT_REGISTER_DECLARATION(MMXRegister, mmx5 , ( 5));
CONSTANT_REGISTER_DECLARATION(MMXRegister, mmx6 , ( 6));
CONSTANT_REGISTER_DECLARATION(MMXRegister, mmx7 , ( 7));

The macro expansion is as follows:

extern const MMXRegister  mnoreg;
enum { mnoreg_MMXRegisterEnumValue = ((-1)) }
extern const MMXRegister  mmx0;
enum { mmx0_MMXRegisterEnumValue = (( 0)) }
extern const MMXRegister  mmx1;
enum { mmx1_MMXRegisterEnumValue = (( 1)) }
extern const MMXRegister  mmx2;
enum { mmx2_MMXRegisterEnumValue = (( 2)) }
extern const MMXRegister  mmx3;
enum { mmx3_MMXRegisterEnumValue = (( 3)) }
extern const MMXRegister  mmx4;
enum { mmx4_MMXRegisterEnumValue = (( 4)) }
extern const MMXRegister  mmx5;
enum { mmx5_MMXRegisterEnumValue = (( 5)) }
extern const MMXRegister  mmx6;
enum { mmx6_MMXRegisterEnumValue = (( 6)) }
extern const MMXRegister  mmx7;
enum { mmx7_MMXRegisterEnumValue = (( 7)) }

There are 8 64-bit registers from mm0 to mm7 in CPUs after MMX Pentium and Pentium II. But in fact, the MMX register and the floating-point number register are shared, that is, the floating-point number register and the MMX register cannot be used at the same time.

The register variables defined in the cpu/x86/vm/register_definitions_x86.cpp file are as follows:

const MMXRegister  mnoreg = ((MMXRegister)mnoreg_MMXRegisterEnumValue)
const MMXRegister  mmx0 =   ((MMXRegister)mmx0_MMXRegisterEnumValue)
const MMXRegister  mmx1 =   ((MMXRegister)mmx1_MMXRegisterEnumValue)
const MMXRegister  mmx2 =   ((MMXRegister)mmx2_MMXRegisterEnumValue)
const MMXRegister  mmx3 =   ((MMXRegister)mmx3_MMXRegisterEnumValue)
const MMXRegister  mmx4 =   ((MMXRegister)mmx4_MMXRegisterEnumValue)
const MMXRegister  mmx5 =   ((MMXRegister)mmx5_MMXRegisterEnumValue)
const MMXRegister  mmx6 =   ((MMXRegister)mmx6_MMXRegisterEnumValue)
const MMXRegister  mmx7 =   ((MMXRegister)mmx7_MMXRegisterEnumValue)

When we need to use the MMX register, it is enough to reference through variables such as mmx0 and mmx1.

4. XMMRegisterImpl class

The XMM register is a register for SSE instructions. Pentium iii and later CPUs provide a total of 8 128-bit wide XMM registers from xmm0 to xmm7. There is also a mxcsr register, which is used to represent the operation status of SSE instructions. In the HotSpot VM, the register is represented by the XMMRegisterImpl class. The definition of this class is as follows:

源代码位置:hotspot/src/share/x86/cpu/vm/register_x86.hpp

// 使用XMMRegister寄存器做为简称
class XMMRegisterImpl;
typedef XMMRegisterImpl* XMMRegister;

class XMMRegisterImpl: public AbstractRegisterImpl {
 public:
  enum {
    number_of_registers = 16
  };
  ...
}

The definition of XMM register is as follows:

CONSTANT_REGISTER_DECLARATION(XMMRegister, xnoreg , (-1));
CONSTANT_REGISTER_DECLARATION(XMMRegister, xmm0 ,   ( 0));
CONSTANT_REGISTER_DECLARATION(XMMRegister, xmm1 ,   ( 1));
CONSTANT_REGISTER_DECLARATION(XMMRegister, xmm2 ,   ( 2));
CONSTANT_REGISTER_DECLARATION(XMMRegister, xmm3 ,   ( 3));
CONSTANT_REGISTER_DECLARATION(XMMRegister, xmm4 ,   ( 4));
CONSTANT_REGISTER_DECLARATION(XMMRegister, xmm5 ,   ( 5));
CONSTANT_REGISTER_DECLARATION(XMMRegister, xmm6 ,   ( 6));
CONSTANT_REGISTER_DECLARATION(XMMRegister, xmm7 ,   ( 7));
CONSTANT_REGISTER_DECLARATION(XMMRegister, xmm8,      (8));
CONSTANT_REGISTER_DECLARATION(XMMRegister, xmm9,      (9));
CONSTANT_REGISTER_DECLARATION(XMMRegister, xmm10,    (10));
CONSTANT_REGISTER_DECLARATION(XMMRegister, xmm11,    (11));
CONSTANT_REGISTER_DECLARATION(XMMRegister, xmm12,    (12));
CONSTANT_REGISTER_DECLARATION(XMMRegister, xmm13,    (13));
CONSTANT_REGISTER_DECLARATION(XMMRegister, xmm14,    (14));
CONSTANT_REGISTER_DECLARATION(XMMRegister, xmm15,    (15));

After macro expansion:

extern const XMMRegister  xnoreg;
enum { xnoreg_XMMRegisterEnumValue = ((-1)) }
extern const XMMRegister  xmm0;
enum { xmm0_XMMRegisterEnumValue = (( 0)) }
extern const XMMRegister  xmm1;
enum { xmm1_XMMRegisterEnumValue = (( 1)) }
extern const XMMRegister  xmm2;
enum { xmm2_XMMRegisterEnumValue = (( 2)) }
extern const XMMRegister  xmm3;
enum { xmm3_XMMRegisterEnumValue = (( 3)) }
extern const XMMRegister  xmm4;
enum { xmm4_XMMRegisterEnumValue = (( 4)) }
extern const XMMRegister  xmm5;
enum { xmm5_XMMRegisterEnumValue = (( 5)) }
extern const XMMRegister  xmm6;
enum { xmm6_XMMRegisterEnumValue = (( 6)) }
extern const XMMRegister  xmm7;
enum { xmm7_XMMRegisterEnumValue = (( 7)) }
extern const XMMRegister  xmm8;
enum { xmm8_XMMRegisterEnumValue = ((8)) }
extern const XMMRegister  xmm9;
enum { xmm9_XMMRegisterEnumValue = ((9)) }
extern const XMMRegister  xmm10;
enum { xmm10_XMMRegisterEnumValue = ((10)) }
extern const XMMRegister  xmm11;
enum { xmm11_XMMRegisterEnumValue = ((11)) }
extern const XMMRegister  xmm12;
enum { xmm12_XMMRegisterEnumValue = ((12)) }
extern const XMMRegister  xmm13;
enum { xmm13_XMMRegisterEnumValue = ((13)) }
extern const XMMRegister  xmm14;
enum { xmm14_XMMRegisterEnumValue = ((14)) }
extern const XMMRegister  xmm15;
enum { xmm15_XMMRegisterEnumValue = ((15)) }

The register variables defined in the cpu/x86/vm/register_definitions_x86.cpp file are as follows:

const XMMRegister  xnoreg = ((XMMRegister)xnoreg_XMMRegisterEnumValue)
const XMMRegister  xmm0 =   ((XMMRegister)xmm0_XMMRegisterEnumValue)
const XMMRegister  xmm1 =   ((XMMRegister)xmm1_XMMRegisterEnumValue)
const XMMRegister  xmm2 =   ((XMMRegister)xmm2_XMMRegisterEnumValue)
const XMMRegister  xmm3 =   ((XMMRegister)xmm3_XMMRegisterEnumValue)
const XMMRegister  xmm4 =   ((XMMRegister)xmm4_XMMRegisterEnumValue)
const XMMRegister  xmm5 =   ((XMMRegister)xmm5_XMMRegisterEnumValue)
const XMMRegister  xmm6 =   ((XMMRegister)xmm6_XMMRegisterEnumValue)
const XMMRegister  xmm7 =   ((XMMRegister)xmm7_XMMRegisterEnumValue)
const XMMRegister  xmm8 =   ((XMMRegister)xmm8_XMMRegisterEnumValue)
const XMMRegister  xmm9 =   ((XMMRegister)xmm9_XMMRegisterEnumValue)
const XMMRegister  xmm10 =  ((XMMRegister)xmm10_XMMRegisterEnumValue)
const XMMRegister  xmm11 =  ((XMMRegister)xmm11_XMMRegisterEnumValue)
const XMMRegister  xmm12 =  ((XMMRegister)xmm12_XMMRegisterEnumValue)
const XMMRegister  xmm13 =  ((XMMRegister)xmm13_XMMRegisterEnumValue)
const XMMRegister  xmm14 =  ((XMMRegister)xmm14_XMMRegisterEnumValue)
const XMMRegister  xmm15 =  ((XMMRegister)xmm15_XMMRegisterEnumValue)

When we need to use XMM registers, we can directly reference them through variables such as xmm0 and xmm1.

Chapter 18-Commonly Used Instructions of the x86 Instruction Set

The x86 instruction set can be divided into the following 4 types:

  1. General instructions
  2. x87 FPU instruction, instruction for floating-point number arithmetic
  3. SIMD instruction is SSE instruction
  4. System instructions, special instructions used when writing OS kernel

Here are some general instructions. The instruction consists of a mnemonic (mnemonic) that identifies the type of the command and an operand (operand) as a parameter. For example, the move command:

instructionOperanddescribe
movqI/R/M,R/MCopy 1 double word (64 bits, 8 bytes) data from one memory location to another memory location
movlI/R/M,R/MCopy 1 word (32 bits, 4 bytes) of data from one memory location to another memory location
movwI/R/M, R/MCopy 2 bytes (16 bits) of data from one memory location to another memory location
movbI/R/M, R/MCopy 1 byte (8 bits) of data from one memory location to another memory location

movl is a mnemonic. The mnemonic has a suffix. For example, the suffix l in movl indicates the data size of the object as the operand. l is the abbreviation of long, which represents the size of 32 bits. In addition, b, w, and q represent the size of 8 bits, 16 bits, and 64 bits, respectively.

If the operand of the instruction is more than one, separate each operand with a comma. Each operand will indicate whether it can be an immediate mode value (I), register (R) or memory address (M).

In addition, I should also remind you that in x86 assembly language, there can only be one operand using a memory location. For example, it is impossible to have mov M, M instructions.

Each operation in the general register can have a character suffix to indicate the size of the operand, as shown in the following table.

C statementGeneral register suffixSize (bytes)
charb1
shortw2
(unsigned) int / long / char*l4
floats4
doublel5
long doublet10/12

Note: General-purpose registers use the suffix "l" to denote both 4-byte integers and 8-byte double-precision floating-point numbers. This does not cause ambiguity, because floating-point numbers use completely different instructions and registers.

When we only introduce call, push and other instructions later, if you encounter callq, pushq and other instructions when studying the assembly of the HotSpot VM virtual machine, don't know it. The suffix indicates the size of the operand.

The following table shows the format and addressing mode of the operands.

FormatOperand valuenameSample (general register = C language)
$ImmImmImmediate addressing$1 = 1
EaR[Ea]Register addressing%eax = eax
ImmM[Imm]Absolute addressing0x104 = *0x104
(Ea)M[R[Ea]]Indirect addressing(%eax)= *eax
Imm(Ea)M[Imm+R[Ea]](Base address + offset) addressing4(%eax) = *(4+eax)
(Ea,Eb)M[R[Ea]+R[Eb]]Index(%eax,%ebx) = *(eax+ebx)
Imm(Ea,Eb)M[Imm+R[Ea]+R[Eb]]Addressing9(%eax,%ebx)= *(9+eax+ebx)
(,Ea,s)M[R[Ea]*s]Scalable indexed addressing(,%eax,4)= (eax4)
Imm(,Ea,s)M[Imm+R[Ea]*s]Scalable indexed addressing0xfc(,%eax,4)= (0xfc+eax4)
(Ea,Eb,s)M(R[Ea]+R[Eb]*s)Scalable indexed addressing(%eax,%ebx,4) = (eax+ebx4)
Imm(Ea,Eb,s)M(Imm+R[Ea]+R[Eb]*s)Scalable indexed addressing8(%eax,%ebx,4) = (8+eax+ebx4)

Note: M[xx] represents the value of the address xx in the memory, and R[xx] represents the value of the register xx. This representation method shows the register and memory in the form of a large array.

According to different compilers, there are two writing formats for assembly:

(1) Intel: Windows faction\
(2) AT&T: Unix faction

Here is a brief introduction to the difference between the two.

Let's get to know the commonly used commands.

Below we give the AT&T assembly writing method, the two writing methods have the following differences.

1. Data transfer instructions

Transfer data from one place to another.

1.1 mov instruction

When we introduced the mov instruction, we introduced some more, because the mov instruction is the most frequently occurring instruction, and there are more suffixes in the mnemonic.

There are 3 forms of the mov instruction, as follows:

mov   #普通的move指令
movs  #符号扩展的move指令,将源操作数进行符号扩展并传送到一个64位寄存器或存储单元中。movs就表示符号扩展 
movz  #零扩展的move指令,将源操作数进行零扩展后传送到一个64位寄存器或存储单元中。movz就表示零扩展

There is a letter after the mov instruction to indicate the size of the operand, in the following form:

movb #完成1个字节的复制
movw #完成2个字节的复制
movl #完成4个字节的复制
movq #完成8个字节的复制

There is another instruction, as follows:

movabsq  I,R

Unlike movq, it stores a 64-bit value directly in a 64-bit register.

The form of the movs instruction is as follows:

movsbw #作符号扩展的1字节复制到2字节
movsbl #作符号扩展的1字节复制到4字节
movsbq #作符号扩展的1字节复制到8字节
movswl #作符号扩展的2字节复制到4字节
movswq #作符号扩展的2字节复制到8字节
movslq #作符号扩展的4字节复制到8字节

The form of the movz instruction is as follows:

movzbw #作0扩展的1字节复制到2字节
movzbl #作0扩展的1字节复制到4字节
movzbq #作0扩展的1字节复制到8字节
movzwl #作0扩展的2字节复制到4字节
movzwq #作0扩展的2字节复制到8字节
movzlq #作0扩展的4字节复制到8字节

An example is as follows:

movl   %ecx,%eax
movl   (%ecx),%eax

The first instruction copies the value in the register ecx to the eax register; the second instruction uses the data in the ecx register as an address to access the memory, and loads the data in the memory into the eax register.

1.2 cmov instruction

The format of the cmov instruction is as follows:

cmovxx

Where xx represents one or more letters, which indicate the conditions that will trigger the transfer operation. The condition depends on the current value of the EFLAGS register.

Each of the eflags registers is shown in the figure below.

Among them, the bits in the eflags register related to the cmove instruction are CF (the mathematical expression produces a carry or borrow), OF (the integer value is infinite or too small), PF (the register contains incorrect data caused by mathematical operations), SF (result) Is positive and not negative) and ZF (the result is zero).

The following table shows unsigned conditional transfer instructions.

Instruction pairdescribeeflags status
cmova/cmovnbeGreater than/not less than or equal to(CF or ZF)=0
cmovae/cmovnb Greater than or equal to/not less thanCF=0 
cmovnc No carryCF=0 
cmovb/cmovnae Greater than/not less than or equal toCF=1
cmovc carryCF=1
cmovbe/cmovna Less than or equal to/not greater than(CF or ZF)=1
cmove/cmovz Equal to/zeroZF=1
cmovne/cmovnz Not equal to/not zeroZF=0 
cmovp/cmovpeParity/even parityPF=1 
cmovnp/cmovpoNon-parity/odd parityPF=0 

Unsigned conditional transfer instructions rely on carry, zero, and parity flags to determine the difference between two operands.

The following table shows the signed conditional transfer instructions.

Instruction pairdescribeeflags status
cmovge/cmovnlGreater than or equal to/not less than(SF XOR OF)=0
cmovl/cmovngeGreater than/not greater than or equal to(SF XOR OF)=1
cmovle/cmovngLess than or equal to/not greater than((SF XOR OF) or ZF)=1
cmovooverflowOF=1
cmovnoNot overflowOF=0
cmovsSigned (negative)SF=1
cmovnsUnsigned (non-negative)SF=0

An example is as follows:

// 将vlaue数值加载到ecx寄存器中
movl value,%ecx 
// 使用cmp指令比较ecx和ebx这两个寄存器中的值,具体就是用ecx减去ebx然后设置eflags
cmp %ebx,%ecx
// 如果ecx的值大于ebx,使用cmova指令设置ebx的值为ecx中的值
cmova %ecx,%ebx 

Note that the first operand of AT&T assembly is first, and the second operand is last.

1.3 push and pop instructions

The form of the push command is shown in the table below.

instructionOperanddescribe
pushI/R/MThe PUSH instruction first reduces the value of ESP, and then copies the source operand to the stack. If the operand is 16 bits, then ESP is reduced by 2, and if the operand is 32 bits, then ESP is reduced by 4
pusha The instructions sequentially (AX, CX, DX, BX, SP, BP, SI, and DI) push 16-bit general-purpose registers onto the stack.
pushad The instructions follow the order of EAX, ECX, EDX, EBX, ESP (value before PUSHAD), EBP, ESI, and EDI to push all 32-bit general-purpose registers onto the stack.

The format of the pop instruction is shown in the following table.

instructionOperanddescribe
popR/MThe instruction first copies the contents of the stack element pointed to by ESP to a 16-bit or 32-bit destination operand, and then increases the value of ESP. If the operand is 16-bit, ESP is increased by 2. If the operand is 32-bit, ESP is increased by 4
popa The instruction pops the same register from the stack in reverse order
popad The instruction pops the same register from the stack in reverse order

1.4 xchg and xchgl

This instruction is used to exchange the value of the operand. The exchange instruction XCHG is an instruction to exchange the contents between two registers, registers and memory variables. The data types of the two operands must be the same, which can be a byte or a word. , It can also be a double word. The format is as follows:

xchg    R/M,R/M
xchgl   I/R,I/R、  

Both operands cannot be memory variables at the same time. The xchgl instruction is an old x86 instruction. Its function is to exchange the 4-byte values in two registers or memory addresses. Both values cannot be memory addresses, and it will not set the condition code.

1.5 lea

Lea calculates the actual address of the source operand and saves the result to the destination operand, which must be a general-purpose register. The format is as follows:

lea M,R

The lea (Load Effective Address) instruction loads the address into the register.

Examples are as follows:

movl  4(%ebx),%eax
leal  4(%ebx),%eax  

The first instruction indicates that the result obtained by adding 4 to the value stored in the ebx register is accessed as the memory address, and the data stored in the memory address is loaded into the eax register.

The second instruction indicates that the result of adding 4 to the value stored in the ebx register is stored in the eax register as a memory address.

To give another example, as follows:

leaq a(b, c, d), %rax 

Calculate the address a + b + c * d, and then load the final address into the register rax. It can be seen that it is just a simple calculation and does not refer to the register in the source operand. This can be used as a multiplication instruction.

2. Arithmetic operation instructions

The following describes the basic arithmetic instructions that operate on signed and unsigned integers.

2.1 add and adc instructions

The format of the instruction is as follows:

add  I/R/M,R/M
adc  I/R/M,R/M

The instruction adds two operands, and the result is stored in the second operand.

For the first instruction, because both registers and memory have bit width restrictions, overflow may occur during addition operations. If the operation overflows, the carry flag (CF) in the flag register eflags will be set to 1.

For the second instruction, using the adc instruction plus the carry flag eflags.CF, you can add 64-bit data on a 32-bit machine.

Conventional arithmetic and logic operation instructions only need to extend the original IA-32 instructions to 64 bits. Such as addq is the addition of four characters.

2.2 sub and sbb instructions

The format of the instruction is as follows:

sub I/R/M,R/M
sbb I/R/M,R/M

The instruction will subtract the first operand from the second operand, and the result will be stored in the second operand.

2.3 imul and mul instructions

The format of the instruction is as follows:

imul I/R/M,R
mul  I/R/M,R

Multiply the first operand and the second operand, and write the result to the second operand. If the second operand is vacant, it defaults to the eax register, and the final complete result will be stored in edx:eax middle.

The first instruction performs signed multiplication, and the second instruction performs unsigned multiplication.

2.4 idiv and div instructions

The format of the instruction is as follows:

div   R/M
idiv  R/M

The first instruction performs unsigned division, and the second instruction performs signed division. The dividend is formed by splicing the edx register and the eax register, the divisor is specified by the first operand of the instruction, the calculated quotient is stored in the eax register, and the remainder is stored in the edx register. As shown below.

    edx:eax
------------ = eax(商)... edx(余数)
    寄存器

The bit width of the dividend, quotient, and divisor data is different during operation. The following table shows the registers used by the idiv instruction and the div instruction.

Data bit widthDividenddivisorbusinessremainder
8-bitaxThe first operand of the instructionalah
16 bitsdx:axThe first operand of the instructionaxdx
32 bitedx:eaxThe first operand of the instructioneaxedx

The idiv instruction and the div instruction usually divide the dividend whose bit width is 2 times the divisor. For example, for an x86-32 machine, the multiple of a general register is 32 bits, and a register cannot hold 64 bits of data, so edx stores the high 32 bits of the dividend, and the eax register stores the low 32 bits of the dividend.

Therefore, when performing division operations, the 32-bit data set in the eax register must be extended to 64 bits including the edx register, that is, signed for sign extension, and unsigned numbers for zero extension.

You can use cltd (AT&T style writing) or cdq (Intel style writing) for sign extension of edx. The format of the instruction is as follows:

cltd  // 将eax寄存器中的数据符号扩展到edx:eax

cltd signs-extends the data in the eax register to edx:eax.

2.5 incl and decl instructions

The format of the instruction is as follows:

inc  R/M
dec  R/M 

Add 1 or subtract 1 to the data stored in the register or memory location specified by the first operand of the instruction.

2.6 negl instruction

The format of the instruction is as follows:

neg R/M

The neg instruction reverses the sign of the first operand.

3. Bit operation instructions

3.1 andl, orl and xorl instructions

The format of the instruction is as follows:

and  I/R/M,R/M
or   I/R/M,R/M
xor  I/R/M,R/M

The and instruction performs a bitwise AND operation between the second operand and the first operand, and writes the result into the second operand;

The or instruction performs a bitwise OR operation between the second operand and the first operand, and writes the result into the second operand;

The xor instruction performs a bitwise XOR operation between the second operand and the first operand, and writes the result to the second operand;

3.2 not command

The format of the instruction is as follows:

not R/M

Invert the operand in bits and write the result to the operand.

3.3 sal, sar, shr instructions

The format of the instruction is as follows:

sal  I/%cl,R/M  #算术左移
sar  I/%cl,R/M  #算术右移
shl  I/%cl,R/M  #逻辑左移
shr  I/%cl,R/M  #逻辑右移

The sal instruction shifts the second operand to the left according to the number of bits specified by the first operand, and writes the result into the second operand. After shifting, the vacant low bit is filled with 0. The first operand of the instruction can only be an 8-bit immediate value or cl register, and only the lower 5 bits of data are meaningful, higher or equal to 6 bits will cause all the data in the register to be removed. It becomes meaningless.

The sar instruction shifts the second operand to the right according to the number of bits specified by the first operand, and writes the result into the second operand. The space after the shift is sign extended. Like the sal instruction, the first operand of the sar instruction must also be an 8-bit immediate or cl register, and only the lower 5 bits of data are meaningful.

The actions of shl instruction and sall instruction are exactly the same, there is no need to distinguish.

The shr command shifts the second operand to the right according to the number of bits specified by the first operand, and writes the result to the second operand. The space after the shift is zero-extended. Like the sal instruction, the first operand of the shr instruction must also be an 8-bit immediate value or cl register, and only the lower 5 bits of data are meaningful.

4. Process control instructions

4.1 jmp instruction

The format of the instruction is as follows:

jmp I/R

The jmp instruction unconditionally jumps the program to the destination address specified by the operand. The jmp instruction can be regarded as an instruction to set the instruction pointer (eip register). The destination address can also be an asterisk followed by a stack of registers, which is an indirect function call. E.g:

jmp *%eax

Jump the program to the address contained in eax.

4.2 Conditional jump instructions

The format of the conditional jump instruction is as follows:

Jcc  目的地址

Where cc refers to the jump condition, if it is true, the program jumps to the destination address; otherwise, the next instruction is executed. The related conditional jump instructions are shown in the following table.

instructionJump conditiondescribeinstructionJump conditiondescribe
jzZF=1Jump when 0jbeCF=1 or ZF=1Jump when greater than or equal to
jnzZF=0Jump when it is not 0jnbeCF=0 and ZF=0Jump when less than or equal to
jeZF=1Jump when equaljgZF=0 and SF=OFJump when greater than
jneZF=0Jump when not equaljngZF=1 or SF!=OFJump if not greater than
jaCF=0 and ZF=0Jump when greater thanjgeSF=OFJump when greater than or equal to
jnaCF=1 or ZF=1Jump if not greater thanjngeSF!=OFJump when less than or equal to
jaeCF=0Jump when greater than or equal tojlSF!=OFJump when less than
jnaeCF=1Jump when less than or equal tojnlSF=OFJump when not less than
jbCF=1Jump when greater thanjleZF=1 or SF!=OFJump when less than or equal to
jnbCF=0Jump if not greater thanjnleZF=0 and SF=OFJump when greater than or equal to

4.3 cmp instruction

The format of the cmp command is as follows:

cmp I/R/M,R/M

The cmp instruction compares the difference between the second operand and the first operand, and sets the flag bit in the flag register eflags according to the result. The cmp instruction is similar to the sub instruction, but the cmp instruction does not change the value of the operand.

The relationship between the operand and the flag bit set is shown in the table.

The relationship between operandsCFZFOF
The first operand is less than the second operand00SF
The first operand is equal to the second operand010
The first operand is greater than the second operand10not SF

4.4 test command

The format of the instruction is as follows:

test I/R/M,R/M

The instruction compares the logical AND of the first operand with the second operand, and sets the flag bit in the flag register eflags according to the result. The test instruction is essentially the same as the and instruction, except that the test instruction does not change the value of the operand.

After the test instruction is executed, CF and OF are usually cleared, and ZF and SF are set according to the result of the operation. When the result of the operation is zero, ZF is set to 1, and SF has the same value as the highest bit.

An example is as follows:

The test command can check several bits at the same time. Suppose you want to know whether bit 0 and bit 3 of the AL register are set to 1, you can use the following command:

test al,00001001b    #掩码为0000 1001,测试第0和位3位是否为1

From the following data set example, it can be inferred that the zero flag bit is set only when all test bits are cleared:

0  0  1  0  0  1  0  1    <- 输入值
0  0  0  0  1  0  0  1    <- 测试值
0  0  0  0  0  0  0  1    <- 结果:ZF=0

0  0  1  0  0  1  0  0    <- 输入值
0  0  0  0  1  0  0  1    <- 测试值
0  0  0  0  0  0  0  0    <- 结果:ZF=1

The test instruction always clears the overflow and carry flags, and the method of modifying the sign flag, zero flag and parity flag is the same as the AND instruction.

4.5 sete instruction

Set the target operand to 0 or 1 according to the status flags (CF, SF, OF, ZF and PF) in eflags. The target operand here points to a byte register (that is, an 8-bit register, such as AL, BL, CL) or a byte in memory. The status code suffix (cc) indicates the condition to be tested.

The format of the instruction to get the flag bit is as follows:

setcc R/M

The instruction sets the operand to 0 or 1 according to the value of the flag register eflags.

The cc in setcc is similar to the cc in Jcc, please refer to the table.

4.6 call instruction

The format of the instruction is as follows:

call I/R/M

The call instruction will call the function specified by the operand. The call instruction pushes the address of the next instruction to the stack, and then jumps to the address specified by the operand, so that the function can return from the sub-function by jumping to the address on the stack. Equivalent to

push %eip
jmp addr

First press the next address of the instruction, and then jump to the target address addr.

4.7 ret instruction

The format of the instruction is as follows:

ret

The ret instruction is used to return from a sub-function. In Linux of the X86 architecture, the return value of the function is set to the eax register and returned. It is equivalent to the following command:

popl %eip

Pop the “address of the next instruction of the call instruction” that pushes the call instruction onto the stack and set it to the instruction pointer. In this way, the program can correctly return to the place of the sub-function.

Physically speaking, the CALL instruction pushes its return address onto the stack, and then copies the address of the called procedure to the instruction pointer register. When the process is ready to return, its RET instruction pops the return address from the stack back to the instruction pointer register.

4.8 enter command

The enter instruction establishes the stack frame required by the function parameters and local variables for the function by initializing the ebp and esp registers. Equivalent to

push   %rbp
mov    %rsp,%rbp

4.9 leave instruction

leave removes the stack frame created with the enter instruction by restoring the ebp and esp registers. Equivalent to

mov %rbp, %rsp
pop %rbp

Point the stack pointer to the frame pointer, and then pop the backup original frame pointer to %ebp

5.0 int instruction

The format of the instruction is as follows:

int I

Cause an interrupt for the given number. This is usually used for system calls and other kernel interfaces.

5. Logo operation

The flags of the eflags register are shown in the figure below.

Some instructions for operating the eflags register flags are shown in the following table.

instructionOperanddescribe
pushfdRThe PUSHFD instruction pushes the contents of the 32-bit EFLAGS register onto the stack
popfdRThe POPFD instruction pops the contents of the top cell of the stack to the EFLAGS register
 cld Set eflags.df to 0

Chapter 19-Load and Store Instructions (1)

The TemplateInterpreterGenerator::generate_all() function generates many routines (that is, machine instruction fragments, called Stub in English), including calling the set_entry_points_for_all_bytes() function to generate routines corresponding to each bytecode.

Eventually, it will call the TemplateInterpreterGenerator::generate_and_dispatch() function. The call stack is as follows:

TemplateTable::geneate()                                templateTable_x86_64.cpp
TemplateInterpreterGenerator::generate_and_dispatch()   templateInterpreter.cpp    
TemplateInterpreterGenerator::set_vtos_entry_points()   templateInterpreter_x86_64.cpp    
TemplateInterpreterGenerator::set_short_entry_points()  templateInterpreter.cpp
TemplateInterpreterGenerator::set_entry_points()        templateInterpreter.cpp
TemplateInterpreterGenerator::set_entry_points_for_all_bytes()   templateInterpreter.cpp    
TemplateInterpreterGenerator::generate_all()            templateInterpreter.cpp
InterpreterGenerator::InterpreterGenerator()            templateInterpreter_x86_64.cpp    
TemplateInterpreter::initialize()                       templateInterpreter.cpp
interpreter_init()                                      interpreter.cpp
init_globals()                                          init.cpp

Many functions on the call stack have been introduced before, and each bytecode will specify a generator function, which is saved by the _gen attribute of the Template. Called in the TemplateTable::generate() function. _gen generates machine instruction fragments corresponding to each bytecode, so it is very important.

First look at a very simple nop bytecode instruction. The template attributes of this directive are as follows:

// Java spec bytecodes  ubcp|disp|clvm|iswd  in    out   generator   argument
def(Bytecodes::_nop   , ____|____|____|____, vtos, vtos, nop        ,  _      );

The generator function generator of nop bytecode instructions does not generate any machine instructions, so the assembly code corresponding to the nop bytecode instructions only has the logic of the stack-top cache. The assembly code generated by calling the set_vtos_entry_points() function is as follows:

// aep
0x00007fffe1027c00: push   %rax
0x00007fffe1027c01: jmpq   0x00007fffe1027c30

// fep
0x00007fffe1027c06: sub    $0x8,%rsp
0x00007fffe1027c0a: vmovss %xmm0,(%rsp)
0x00007fffe1027c0f: jmpq   0x00007fffe1027c30

// dep
0x00007fffe1027c14: sub    $0x10,%rsp
0x00007fffe1027c18: vmovsd %xmm0,(%rsp)
0x00007fffe1027c1d: jmpq   0x00007fffe1027c30

// lep
0x00007fffe1027c22: sub    $0x10,%rsp
0x00007fffe1027c26: mov    %rax,(%rsp)
0x00007fffe1027c2a: jmpq   0x00007fffe1027c30

// bep cep sep iep
0x00007fffe1027c2f: push   %rax

// vep

// 接下来为取指逻辑,开始的地址为0x00007fffe1027c30

It can be seen that since tos_in is vtos, if it is aep, bep, cep, sep, and iep, directly use the push instruction to push the stack top cache value stored in %rax into the expression stack. For fep, dep, and lep, the corresponding memory size is opened on the stack, and then the value in the register is stored on the top of the expression stack, which has the same effect as the push instruction.

In the set_vtos_entry_points() function, the generate_and_dispatch() function is called to generate the machine instruction fragment of the nop instruction and the machine instruction fragment of the next bytecode instruction. nop will not generate any machine instructions, and the fragments for fetching are as follows:

// movzbl 将做了零扩展的字节传送到双字,地址为0x00007fffe1027c30
0x00007fffe1027c30: movzbl  0x1(%r13),%ebx       

0x00007fffe1027c35: inc %r13 

0x00007fffe1027c38: movabs $0x7ffff73ba4a0,%r10 

// movabs的源操作数只能是立即数或标号(本质还是立即数),目的操作数是寄存器 
0x00007fffe1027c42: jmpq *(%r10,%rbx,8)

r13 points to the address of the bytecode instruction currently to be fetched. Then %r13+1 skips the current nop instruction and points to the address of the next bytecode instruction, and then executes the movzbl instruction to load the pointed Opcode into %ebx.

The jump address through jmpq is %r10+%rbx*8. The jump address has been introduced in detail before, so I won't introduce it here.

We explained the nop instruction and reviewed the logic of the stack-top cache and the instruction fetching logic. For each bytecode instruction, there will be a stack-top cache and instruction fetching logic, which will be discussed later when introducing bytecode instructions. I won't introduce these two logics again.

The bytecode instructions for loading and storing operations are shown in the following table.

BytecodeParticleCommand meaning
0x00nopdo nothing
0x01aconst_null    Push null to the top of the stack
0x02iconst_m1Push int -1 to the top of the stack
0x03iconst_0Push the int type 0 to the top of the stack
0x04iconst_1Push the int type 1 to the top of the stack
0x05iconst_2Push the int type 2 to the top of the stack
0x06iconst_3Push the int type 3 to the top of the stack
0x07iconst_4Push the int type 4 to the top of the stack
0x08iconst_5Push the int type 5 to the top of the stack
0x09lconst_0Push long 0 to the top of the stack
0x0alconst_1Push long type 1 to the top of the stack
0x0bfconst_0Push float 0 to the top of the stack
0x0cfconst_1Push float type 1 to the top of the stack
0x0dfconst_2Push float 2 to the top of the stack
0x0edconst_0Push double type 0 to the top of the stack
0x0fdconst_1Push double type 1 to the top of the stack
0x10bipushPush the single-byte constant value (-128~127) to the top of the stack
0x11sipushPush a short integer constant value (-32768~32767) to the top of the stack
0x12ldcPush int, float or String constant values from the constant pool to the top of the stack
0x13ldc_wPush the constant value of int, float or String type from the constant pool to the top of the stack (wide index)
0x14ldc2_wPush the long or double constant value from the constant pool to the top of the stack (wide index)
0x15iloadPush the specified int type local variable to the top of the stack
0x16lloadPush the specified long type local variable to the top of the stack
0x17floadPush the specified float type local variable to the top of the stack
0x18dloadPush the specified double type local variable to the top of the stack
0x19aloadPush the specified reference type local variable to the top of the stack
0x1aiload_0Push the first int type local variable to the top of the stack
0x1biload_1Push the second int type local variable to the top of the stack
0x1ciload_2Push the third int type local variable to the top of the stack
0x1diload_3Push the fourth int type local variable to the top of the stack
0x1elload_0Push the first long local variable to the top of the stack
0x1flload_1Push the second long local variable to the top of the stack
0x20lload_2Push the third long local variable to the top of the stack
0x21lload_3Push the fourth long local variable to the top of the stack
0x22fload_0Push the first float type local variable to the top of the stack
0x23fload_1Push the second float type local variable to the top of the stack
0x24fload_2Push the third float type local variable to the top of the stack
0x25fload_3Push the fourth float type local variable to the top of the stack
0x26dload_0Push the first double type local variable to the top of the stack
0x27dload_1Push the second double local variable to the top of the stack
0x28dload_2Push the third double local variable to the top of the stack
0x29dload_3Push the fourth double type local variable to the top of the stack
0x2aaload_0Push the first reference type local variable to the top of the stack
0x2baload_1Push the second reference type local variable to the top of the stack
0x2caload_2Push the third reference type local variable to the top of the stack
0x2daload_3Push the fourth reference type local variable to the top of the stack
0x2eialoadPush the value of the specified index of the int type array to the top of the stack
0x2flaloadPush the value of the specified index of the long array to the top of the stack
0x30faloadPush the value of the specified index of the float type array to the top of the stack
0x31daloadPush the value of the specified index of the double array to the top of the stack
0x32aaloadPush the value of the specified index of the reference array to the top of the stack
0x33baloadPush the value of the specified index of the boolean or byte array to the top of the stack
0x34caloadPush the value of the specified index of the char array to the top of the stack
0x35saloadPush the value of the specified index of the short array to the top of the stack
0x36istoreStore the int type value on the top of the stack into the specified local variable
0x37lstoreStore the long value on the top of the stack into the specified local variable
0x38fstoreStore the float value on the top of the stack into the specified local variable
0x39dstoreStore the double value on the top of the stack into the specified local variable
0x3aastoreStore the top-referenced value of the stack into the specified local variable
0x3bistore_0Store the int value on the top of the stack into the first local variable
0x3cistore_1Store the int value of the top of the stack into the second local variable
0x3distore_2Store the int value on the top of the stack into the third local variable
0x3eistore_3Store the int value of the top of the stack into the fourth local variable
0x3flstore_0Store the long value on the top of the stack into the first local variable
0x40lstore_1Store the long value on the top of the stack into the second local variable
0x41lstore_2Store the long value on the top of the stack into the third local variable
0x42lstore_3Store the long value on the top of the stack into the fourth local variable
0x43fstore_0Store the float value on the top of the stack into the first local variable
0x44fstore_1Store the float value on the top of the stack into the second local variable
0x45fstore_2Store the float value on the top of the stack into the third local variable
0x46fstore_3Store the float value on the top of the stack into the fourth local variable
0x47dstore_0Store the double value on the top of the stack into the first local variable
0x48dstore_1Store the double value on the top of the stack into the second local variable
0x49dstore_2Store the double value on the top of the stack into the third local variable
0x4adstore_3Store the double value on the top of the stack into the fourth local variable
0x4bastore_0Store the top-referenced value of the stack into the first local variable
0x4castore_1Store the top-referenced value of the stack into the second local variable
0x4dastore_2Store the top-referenced value of the stack in the third local variable
0x4eastore_3Store the top-referenced value of the stack into the fourth local variable
0x4fiastoreStore the top int value of the stack in the specified index position of the specified array
0x50lastoreStore the long value on the top of the stack into the specified index position of the specified array
0x51fastoreStore the float value on the top of the stack into the specified index position of the specified array
0x52dastoreStore the double value on the top of the stack into the specified index position of the specified array
0x53aastoreStore the top reference value of the stack in the specified index position of the specified array
0x54bastoreStore the boolean or byte value at the top of the stack into the specified index position of the specified array
0x55castoreStore the top char value of the stack into the specified index position of the specified array
0x56sastoreStore the short value on the top of the stack into the specified index position of the specified array
0xc4wideThe instruction to expand the access index of the local variable table

We will not check the logic of the corresponding machine instruction fragment for each bytecode instruction (in fact, after decompiling the machine instruction fragment into assembly, understand the execution logic by viewing the assembly), the logic of some instructions is similar, so here we only choose A few typical introductions.

1. Push type instructions

(1) aconst_null instruction

aconst_null means sending null to the top of the stack. The template is defined as follows:

def(Bytecodes::_aconst_null , ____|____|____|____, vtos, atos, aconst_null  ,  _ );

The assembly code of the instruction is as follows:

// xor 指令在两个操作数的对应位之间进行逻辑异或操作,并将结果存放在目标操作数中
// 第1个操作数和第2个操作数相同时,执行异或操作就相当于执行清零操作
xor    %eax,%eax 

Since tos_out is atos, the result at the top of the stack is cached in the %eax register, and only the xor operation is performed on the %eax register.

(2) iconst_m1 instruction

iconst_m1 means pushing -1 into the stack. The template is defined as follows:

def(Bytecodes::_iconst_m1 , ____|____|____|____, vtos, itos, iconst , -1 );

After the generated machine instructions are disassembled, the resulting assembly code is as follows:

mov    $0xffffffff,%eax 

Other bytecode instructions similar to the isconst_m1 bytecode instructions, such as isconst_0, isconst_1, etc., are defined as follows:

def(Bytecodes::_iconst_m1           , ____|____|____|____, vtos, itos, iconst              , -1           );
def(Bytecodes::_iconst_0            , ____|____|____|____, vtos, itos, iconst              ,  0           );
def(Bytecodes::_iconst_1            , ____|____|____|____, vtos, itos, iconst              ,  1           );
def(Bytecodes::_iconst_2            , ____|____|____|____, vtos, itos, iconst              ,  2           );
def(Bytecodes::_iconst_3            , ____|____|____|____, vtos, itos, iconst              ,  3           );
def(Bytecodes::_iconst_4            , ____|____|____|____, vtos, itos, iconst              ,  4           );
def(Bytecodes::_iconst_5            , ____|____|____|____, vtos, itos, iconst              ,  5           );

As you can see, the generating functions are all the same TemplateTable::iconst() function.

The assembly code of iconst_0 is as follows:

xor    %eax,%eax

The assembly code corresponding to the bytecode instructions of iconst_@ (@为1, 2, 3, 4, 5) is as follows:

// aep  
0x00007fffe10150a0: push   %rax
0x00007fffe10150a1: jmpq   0x00007fffe10150d0

// fep
0x00007fffe10150a6: sub    $0x8,%rsp
0x00007fffe10150aa: vmovss %xmm0,(%rsp)
0x00007fffe10150af: jmpq   0x00007fffe10150d0

// dep
0x00007fffe10150b4: sub    $0x10,%rsp
0x00007fffe10150b8: vmovsd %xmm0,(%rsp)
0x00007fffe10150bd: jmpq   0x00007fffe10150d0

// lep
0x00007fffe10150c2: sub    $0x10,%rsp
0x00007fffe10150c6: mov    %rax,(%rsp)
0x00007fffe10150ca: jmpq   0x00007fffe10150d0

// bep/cep/sep/iep
0x00007fffe10150cf: push   %rax

// vep
0x00007fffe10150d0 mov $0x@,%eax // @代表1、2、3、4、5

If you have read the article I wrote before, then the above assembly code should be able to understand, I will not intro


HeapDump性能社区
442 声望693 粉丝

有性能问题,上HeapDump性能社区