This article is authored by HeapDump performance community chief lecturer Kumo (Ma Zhi) to collate and publish
Chapter 17-x86-64 Register
The machine language system that can be interpreted by different CPUs is called instruction set architecture (ISA, Instruction Set Architecture), which can also be called instruction set (instruction set). Intel refers to the 32-bit CPU instruction set architecture of the x86 series of CPUs as IA-32. IA is the abbreviation of "Intel Architecture" and can also be called i386 and x86-32. AMD is equivalent to Intell and proposed a 64-bit extension of the x86 series, so the 64-bit instruction set architecture of the x86 series designed by AMD is called AMD64. Later, Intel added almost the same instruction set as AMD64 in its own CPU, called the Intel 64 instruction set. AMD64 and Intel 64 can be collectively referred to as x86-64.
All x86-64 registers are the same as the machine word length (data bus bit width), that is, 64 bits. x86-64 expands the 8 32-bit general registers of x86 to 64 bits (eax, ebx, ecx, edx, eci, edi, ebp, esp), and 8 new 64-bit registers (r8-r15) have been added. The naming method has also changed from "exx" to "rxx", but "exx" is still reserved for 32-bit Operation, the following table describes the naming and function of each register.
describe | 32 bit | 64 bit |
General register bank | eax | rax |
ecx | rcx | |
edx | rdx | |
ebx | rbx | |
esp | rsp | |
ebp | rbp | |
esi | rsi | |
edi | rdi | |
- | r8~r15 | |
Floating-point register bank | st0~st7 | st0~st7 |
XMM register bank | XMM0~XMM7 | XMM0~XMM15 |
Among them, %esp and %ebp are used for special purposes to save pointers to specific locations in the program stack.
There is also the eflags register, which uses bits to express specific meanings, as shown in the figure below.
In HotSpot VM, the classes that represent registers are inherited from the AbstractRegisterImpl class. The definition of this class is as follows:
源代码位置:hotspot/src/share/vm/asm/register.hpp
class AbstractRegisterImpl;
typedef AbstractRegisterImpl* AbstractRegister;
class AbstractRegisterImpl {
protected:
int value() const { return (int)(intx)this; }
};
The inheritance system of the AbstractRegisterImpl class is shown in the figure below.
In addition, there is a ConcreteRegisterImpl class that also inherits AbstractRegisterImpl. This gray is related to the implementation of the C2 compiler, so I won't explain it too much here.
1, RegisterImpl class
The RegisterImpl class is used to represent general-purpose registers. The definition of the class is as follows:
源代码位置:cpu/x86/vm/register_x86.hpp
// 使用Register做为RegisterImpl的简称
class RegisterImpl;
typedef RegisterImpl* Register;
class RegisterImpl: public AbstractRegisterImpl {
public:
enum {
number_of_registers = 16,
number_of_byte_registers = 16
};
// ...
};
For 64 bits, the bit width of general-purpose registers is 64 bits, and part of eax, ebx, ecx, and edx can also be used as 8-bit registers, so the number of registers that can store bytes is 4.
Define the register in HotSpot VM as follows:
源代码位置:hotspot/src/cpu/x86/vm/register_x86.hpp
CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1)); // noreg_RegisterEnumValue = ((-1))
CONSTANT_REGISTER_DECLARATION(Register, rax, (0)); // rax_RegisterEnumValue = ((0))
CONSTANT_REGISTER_DECLARATION(Register, rcx, (1)); // rcx_RegisterEnumValue = ((1))
CONSTANT_REGISTER_DECLARATION(Register, rdx, (2)); // rdx_RegisterEnumValue = ((2))
CONSTANT_REGISTER_DECLARATION(Register, rbx, (3)); // rbx_RegisterEnumValue = ((3))
CONSTANT_REGISTER_DECLARATION(Register, rsp, (4)); // rsp_RegisterEnumValue = ((4))
CONSTANT_REGISTER_DECLARATION(Register, rbp, (5)); // rbp_RegisterEnumValue = ((5))
CONSTANT_REGISTER_DECLARATION(Register, rsi, (6)); // rsi_RegisterEnumValue = ((6))
CONSTANT_REGISTER_DECLARATION(Register, rdi, (7)); // rdi_RegisterEnumValue = ((7))
CONSTANT_REGISTER_DECLARATION(Register, r8, (8)); // r8_RegisterEnumValue = ((8))
CONSTANT_REGISTER_DECLARATION(Register, r9, (9)); // r9_RegisterEnumValue = ((9))
CONSTANT_REGISTER_DECLARATION(Register, r10, (10)); // r10_RegisterEnumValue = ((10))
CONSTANT_REGISTER_DECLARATION(Register, r11, (11)); // r11_RegisterEnumValue = ((11))
CONSTANT_REGISTER_DECLARATION(Register, r12, (12)); // r12_RegisterEnumValue = ((12))
CONSTANT_REGISTER_DECLARATION(Register, r13, (13)); // r13_RegisterEnumValue = ((13))
CONSTANT_REGISTER_DECLARATION(Register, r14, (14)); // r14_RegisterEnumValue = ((14))
CONSTANT_REGISTER_DECLARATION(Register, r15, (15)); // r15_RegisterEnumValue = ((15))
The macro CONSTANT_REGISTER_DECLARATION is defined as follows:
源代码位置:hotspot/src/share/vm/asm/register.hpp
#define CONSTANT_REGISTER_DECLARATION(type, name, value) \
extern const type name; \
enum { name##_##type##EnumValue = (value) }
After macro expansion, it is as follows:
extern const Register rax;
enum { rax_RegisterEnumValue = ((0)) }
extern const Register rcx;
enum { rcx_RegisterEnumValue = ((1)) }
extern const Register rdx;
enum { rdx_RegisterEnumValue = ((2)) }
extern const Register rbx;
enum { rbx_RegisterEnumValue = ((3)) }
extern const Register rsp;
enum { rsp_RegisterEnumValue = ((4)) }
extern const Register rbp;
enum { rbp_RegisterEnumValue = ((5)) }
extern const Register rsi;
enum { rsi_RegisterEnumValue = ((6)) }
extern const Register rsi;
enum { rdi_RegisterEnumValue = ((7)) }
extern const Register r8;
enum { r8_RegisterEnumValue = ((8)) }
extern const Register r9;
enum { r9_RegisterEnumValue = ((9)) }
extern const Register r10;
enum { r10_RegisterEnumValue = ((10)) }
extern const Register r11;
enum { r11_RegisterEnumValue = ((11)) }
extern const Register r12;
enum { r12_RegisterEnumValue = ((12)) }
extern const Register r13;
enum { r13_RegisterEnumValue = ((13)) }
extern const Register r14;
enum { r14_RegisterEnumValue = ((14)) }
extern const Register r15;
enum { r15_RegisterEnumValue = ((15)) }
The above enumeration class assigns a constant value to the register.
The registers defined in the cpu/x86/vm/register_definitions_x86.cpp file are as follows:
const Register noreg = ((Register)noreg_RegisterEnumValue)
const Register rax = ((Register)rax_RegisterEnumValue)
const Register rcx = ((Register)rcx_RegisterEnumValue)
const Register rdx = ((Register)rdx_RegisterEnumValue)
const Register rbx = ((Register)rbx_RegisterEnumValue)
const Register rsp = ((Register)rsp_RegisterEnumValue)
const Register rbp = ((Register)rbp_RegisterEnumValue)
const Register rsi = ((Register)rsi_RegisterEnumValue)
const Register rdi = ((Register)rdi_RegisterEnumValue)
const Register r8 = ((Register)r8_RegisterEnumValue)
const Register r9 = ((Register)r9_RegisterEnumValue)
const Register r10 = ((Register)r10_RegisterEnumValue)
const Register r11 = ((Register)r11_RegisterEnumValue)
const Register r12 = ((Register)r12_RegisterEnumValue)
const Register r13 = ((Register)r13_RegisterEnumValue)
const Register r14 = ((Register)r14_RegisterEnumValue)
const Register r15 = ((Register)r15_RegisterEnumValue)
When we need to use general-purpose registers, we can reference them through variables such as rax and rcx.
2、FloatRegisterImpl
In HotSpot VM, FloatRegisterImpl is used to represent floating-point registers. The definition of this class is as follows:
源代码位置:hotspot/src/cpu/x86/vm/register_x86.hpp
// 使用FloatRegister做为简称
class FloatRegisterImpl;
typedef FloatRegisterImpl* FloatRegister;
class FloatRegisterImpl: public AbstractRegisterImpl {
public:
enum {
number_of_registers = 8
};
// ...
}
There are 8 floating-point registers, st0~st7, which are 8 80-bit registers.
It should be noted here that there is also a kind of register MMX. MMX is not a new register, but borrows the lower 64 bits of the 80-bit floating-point register. In other words, using the MMX instruction set will affect floating-point operations!
3、MMXRegisterImpl
MMX is a SIMD technology that can perform multiple data operations with one instruction. There are 8 64-bit registers (borrowing the lower 64 bits of the 80-bit floating-point register), mm0-mm7, which are comparable to other ordinary 64-bit registers. The difference of the register is that it can calculate two 32-bit data or four 16-bit data at the same time through the operation of its instructions, which can be applied to the calculation of the graphics color in the image processing process.
The definition of the MMXRegisterImpl class is as follows:
class MMXRegisterImpl;
typedef MMXRegisterImpl* MMXRegister;
The definition of MMX register is as follows:
CONSTANT_REGISTER_DECLARATION(MMXRegister, mnoreg , (-1));
CONSTANT_REGISTER_DECLARATION(MMXRegister, mmx0 , ( 0));
CONSTANT_REGISTER_DECLARATION(MMXRegister, mmx1 , ( 1));
CONSTANT_REGISTER_DECLARATION(MMXRegister, mmx2 , ( 2));
CONSTANT_REGISTER_DECLARATION(MMXRegister, mmx3 , ( 3));
CONSTANT_REGISTER_DECLARATION(MMXRegister, mmx4 , ( 4));
CONSTANT_REGISTER_DECLARATION(MMXRegister, mmx5 , ( 5));
CONSTANT_REGISTER_DECLARATION(MMXRegister, mmx6 , ( 6));
CONSTANT_REGISTER_DECLARATION(MMXRegister, mmx7 , ( 7));
The macro expansion is as follows:
extern const MMXRegister mnoreg;
enum { mnoreg_MMXRegisterEnumValue = ((-1)) }
extern const MMXRegister mmx0;
enum { mmx0_MMXRegisterEnumValue = (( 0)) }
extern const MMXRegister mmx1;
enum { mmx1_MMXRegisterEnumValue = (( 1)) }
extern const MMXRegister mmx2;
enum { mmx2_MMXRegisterEnumValue = (( 2)) }
extern const MMXRegister mmx3;
enum { mmx3_MMXRegisterEnumValue = (( 3)) }
extern const MMXRegister mmx4;
enum { mmx4_MMXRegisterEnumValue = (( 4)) }
extern const MMXRegister mmx5;
enum { mmx5_MMXRegisterEnumValue = (( 5)) }
extern const MMXRegister mmx6;
enum { mmx6_MMXRegisterEnumValue = (( 6)) }
extern const MMXRegister mmx7;
enum { mmx7_MMXRegisterEnumValue = (( 7)) }
There are 8 64-bit registers from mm0 to mm7 in CPUs after MMX Pentium and Pentium II. But in fact, the MMX register and the floating-point number register are shared, that is, the floating-point number register and the MMX register cannot be used at the same time.
The register variables defined in the cpu/x86/vm/register_definitions_x86.cpp file are as follows:
const MMXRegister mnoreg = ((MMXRegister)mnoreg_MMXRegisterEnumValue)
const MMXRegister mmx0 = ((MMXRegister)mmx0_MMXRegisterEnumValue)
const MMXRegister mmx1 = ((MMXRegister)mmx1_MMXRegisterEnumValue)
const MMXRegister mmx2 = ((MMXRegister)mmx2_MMXRegisterEnumValue)
const MMXRegister mmx3 = ((MMXRegister)mmx3_MMXRegisterEnumValue)
const MMXRegister mmx4 = ((MMXRegister)mmx4_MMXRegisterEnumValue)
const MMXRegister mmx5 = ((MMXRegister)mmx5_MMXRegisterEnumValue)
const MMXRegister mmx6 = ((MMXRegister)mmx6_MMXRegisterEnumValue)
const MMXRegister mmx7 = ((MMXRegister)mmx7_MMXRegisterEnumValue)
When we need to use the MMX register, it is enough to reference through variables such as mmx0 and mmx1.
4. XMMRegisterImpl class
The XMM register is a register for SSE instructions. Pentium iii and later CPUs provide a total of 8 128-bit wide XMM registers from xmm0 to xmm7. There is also a mxcsr register, which is used to represent the operation status of SSE instructions. In the HotSpot VM, the register is represented by the XMMRegisterImpl class. The definition of this class is as follows:
源代码位置:hotspot/src/share/x86/cpu/vm/register_x86.hpp
// 使用XMMRegister寄存器做为简称
class XMMRegisterImpl;
typedef XMMRegisterImpl* XMMRegister;
class XMMRegisterImpl: public AbstractRegisterImpl {
public:
enum {
number_of_registers = 16
};
...
}
The definition of XMM register is as follows:
CONSTANT_REGISTER_DECLARATION(XMMRegister, xnoreg , (-1));
CONSTANT_REGISTER_DECLARATION(XMMRegister, xmm0 , ( 0));
CONSTANT_REGISTER_DECLARATION(XMMRegister, xmm1 , ( 1));
CONSTANT_REGISTER_DECLARATION(XMMRegister, xmm2 , ( 2));
CONSTANT_REGISTER_DECLARATION(XMMRegister, xmm3 , ( 3));
CONSTANT_REGISTER_DECLARATION(XMMRegister, xmm4 , ( 4));
CONSTANT_REGISTER_DECLARATION(XMMRegister, xmm5 , ( 5));
CONSTANT_REGISTER_DECLARATION(XMMRegister, xmm6 , ( 6));
CONSTANT_REGISTER_DECLARATION(XMMRegister, xmm7 , ( 7));
CONSTANT_REGISTER_DECLARATION(XMMRegister, xmm8, (8));
CONSTANT_REGISTER_DECLARATION(XMMRegister, xmm9, (9));
CONSTANT_REGISTER_DECLARATION(XMMRegister, xmm10, (10));
CONSTANT_REGISTER_DECLARATION(XMMRegister, xmm11, (11));
CONSTANT_REGISTER_DECLARATION(XMMRegister, xmm12, (12));
CONSTANT_REGISTER_DECLARATION(XMMRegister, xmm13, (13));
CONSTANT_REGISTER_DECLARATION(XMMRegister, xmm14, (14));
CONSTANT_REGISTER_DECLARATION(XMMRegister, xmm15, (15));
After macro expansion:
extern const XMMRegister xnoreg;
enum { xnoreg_XMMRegisterEnumValue = ((-1)) }
extern const XMMRegister xmm0;
enum { xmm0_XMMRegisterEnumValue = (( 0)) }
extern const XMMRegister xmm1;
enum { xmm1_XMMRegisterEnumValue = (( 1)) }
extern const XMMRegister xmm2;
enum { xmm2_XMMRegisterEnumValue = (( 2)) }
extern const XMMRegister xmm3;
enum { xmm3_XMMRegisterEnumValue = (( 3)) }
extern const XMMRegister xmm4;
enum { xmm4_XMMRegisterEnumValue = (( 4)) }
extern const XMMRegister xmm5;
enum { xmm5_XMMRegisterEnumValue = (( 5)) }
extern const XMMRegister xmm6;
enum { xmm6_XMMRegisterEnumValue = (( 6)) }
extern const XMMRegister xmm7;
enum { xmm7_XMMRegisterEnumValue = (( 7)) }
extern const XMMRegister xmm8;
enum { xmm8_XMMRegisterEnumValue = ((8)) }
extern const XMMRegister xmm9;
enum { xmm9_XMMRegisterEnumValue = ((9)) }
extern const XMMRegister xmm10;
enum { xmm10_XMMRegisterEnumValue = ((10)) }
extern const XMMRegister xmm11;
enum { xmm11_XMMRegisterEnumValue = ((11)) }
extern const XMMRegister xmm12;
enum { xmm12_XMMRegisterEnumValue = ((12)) }
extern const XMMRegister xmm13;
enum { xmm13_XMMRegisterEnumValue = ((13)) }
extern const XMMRegister xmm14;
enum { xmm14_XMMRegisterEnumValue = ((14)) }
extern const XMMRegister xmm15;
enum { xmm15_XMMRegisterEnumValue = ((15)) }
The register variables defined in the cpu/x86/vm/register_definitions_x86.cpp file are as follows:
const XMMRegister xnoreg = ((XMMRegister)xnoreg_XMMRegisterEnumValue)
const XMMRegister xmm0 = ((XMMRegister)xmm0_XMMRegisterEnumValue)
const XMMRegister xmm1 = ((XMMRegister)xmm1_XMMRegisterEnumValue)
const XMMRegister xmm2 = ((XMMRegister)xmm2_XMMRegisterEnumValue)
const XMMRegister xmm3 = ((XMMRegister)xmm3_XMMRegisterEnumValue)
const XMMRegister xmm4 = ((XMMRegister)xmm4_XMMRegisterEnumValue)
const XMMRegister xmm5 = ((XMMRegister)xmm5_XMMRegisterEnumValue)
const XMMRegister xmm6 = ((XMMRegister)xmm6_XMMRegisterEnumValue)
const XMMRegister xmm7 = ((XMMRegister)xmm7_XMMRegisterEnumValue)
const XMMRegister xmm8 = ((XMMRegister)xmm8_XMMRegisterEnumValue)
const XMMRegister xmm9 = ((XMMRegister)xmm9_XMMRegisterEnumValue)
const XMMRegister xmm10 = ((XMMRegister)xmm10_XMMRegisterEnumValue)
const XMMRegister xmm11 = ((XMMRegister)xmm11_XMMRegisterEnumValue)
const XMMRegister xmm12 = ((XMMRegister)xmm12_XMMRegisterEnumValue)
const XMMRegister xmm13 = ((XMMRegister)xmm13_XMMRegisterEnumValue)
const XMMRegister xmm14 = ((XMMRegister)xmm14_XMMRegisterEnumValue)
const XMMRegister xmm15 = ((XMMRegister)xmm15_XMMRegisterEnumValue)
When we need to use XMM registers, we can directly reference them through variables such as xmm0 and xmm1.
Chapter 18-Commonly Used Instructions of the x86 Instruction Set
The x86 instruction set can be divided into the following 4 types:
- General instructions
- x87 FPU instruction, instruction for floating-point number arithmetic
- SIMD instruction is SSE instruction
- System instructions, special instructions used when writing OS kernel
Here are some general instructions. The instruction consists of a mnemonic (mnemonic) that identifies the type of the command and an operand (operand) as a parameter. For example, the move command:
instruction | Operand | describe |
movq | I/R/M,R/M | Copy 1 double word (64 bits, 8 bytes) data from one memory location to another memory location |
movl | I/R/M,R/M | Copy 1 word (32 bits, 4 bytes) of data from one memory location to another memory location |
movw | I/R/M, R/M | Copy 2 bytes (16 bits) of data from one memory location to another memory location |
movb | I/R/M, R/M | Copy 1 byte (8 bits) of data from one memory location to another memory location |
movl is a mnemonic. The mnemonic has a suffix. For example, the suffix l in movl indicates the data size of the object as the operand. l is the abbreviation of long, which represents the size of 32 bits. In addition, b, w, and q represent the size of 8 bits, 16 bits, and 64 bits, respectively.
If the operand of the instruction is more than one, separate each operand with a comma. Each operand will indicate whether it can be an immediate mode value (I), register (R) or memory address (M).
In addition, I should also remind you that in x86 assembly language, there can only be one operand using a memory location. For example, it is impossible to have mov M, M instructions.
Each operation in the general register can have a character suffix to indicate the size of the operand, as shown in the following table.
C statement | General register suffix | Size (bytes) |
char | b | 1 |
short | w | 2 |
(unsigned) int / long / char* | l | 4 |
float | s | 4 |
double | l | 5 |
long double | t | 10/12 |
Note: General-purpose registers use the suffix "l" to denote both 4-byte integers and 8-byte double-precision floating-point numbers. This does not cause ambiguity, because floating-point numbers use completely different instructions and registers.
When we only introduce call, push and other instructions later, if you encounter callq, pushq and other instructions when studying the assembly of the HotSpot VM virtual machine, don't know it. The suffix indicates the size of the operand.
The following table shows the format and addressing mode of the operands.
Format | Operand value | name | Sample (general register = C language) |
$Imm | Imm | Immediate addressing | $1 = 1 |
Ea | R[Ea] | Register addressing | %eax = eax |
Imm | M[Imm] | Absolute addressing | 0x104 = *0x104 |
(Ea) | M[R[Ea]] | Indirect addressing | (%eax)= *eax |
Imm(Ea) | M[Imm+R[Ea]] | (Base address + offset) addressing | 4(%eax) = *(4+eax) |
(Ea,Eb) | M[R[Ea]+R[Eb]] | Index | (%eax,%ebx) = *(eax+ebx) |
Imm(Ea,Eb) | M[Imm+R[Ea]+R[Eb]] | Addressing | 9(%eax,%ebx)= *(9+eax+ebx) |
(,Ea,s) | M[R[Ea]*s] | Scalable indexed addressing | (,%eax,4)= (eax4) |
Imm(,Ea,s) | M[Imm+R[Ea]*s] | Scalable indexed addressing | 0xfc(,%eax,4)= (0xfc+eax4) |
(Ea,Eb,s) | M(R[Ea]+R[Eb]*s) | Scalable indexed addressing | (%eax,%ebx,4) = (eax+ebx4) |
Imm(Ea,Eb,s) | M(Imm+R[Ea]+R[Eb]*s) | Scalable indexed addressing | 8(%eax,%ebx,4) = (8+eax+ebx4) |
Note: M[xx] represents the value of the address xx in the memory, and R[xx] represents the value of the register xx. This representation method shows the register and memory in the form of a large array.
According to different compilers, there are two writing formats for assembly:
(1) Intel: Windows faction\
(2) AT&T: Unix faction
Here is a brief introduction to the difference between the two.
Let's get to know the commonly used commands.
Below we give the AT&T assembly writing method, the two writing methods have the following differences.
1. Data transfer instructions
Transfer data from one place to another.
1.1 mov instruction
When we introduced the mov instruction, we introduced some more, because the mov instruction is the most frequently occurring instruction, and there are more suffixes in the mnemonic.
There are 3 forms of the mov instruction, as follows:
mov #普通的move指令
movs #符号扩展的move指令,将源操作数进行符号扩展并传送到一个64位寄存器或存储单元中。movs就表示符号扩展
movz #零扩展的move指令,将源操作数进行零扩展后传送到一个64位寄存器或存储单元中。movz就表示零扩展
There is a letter after the mov instruction to indicate the size of the operand, in the following form:
movb #完成1个字节的复制
movw #完成2个字节的复制
movl #完成4个字节的复制
movq #完成8个字节的复制
There is another instruction, as follows:
movabsq I,R
Unlike movq, it stores a 64-bit value directly in a 64-bit register.
The form of the movs instruction is as follows:
movsbw #作符号扩展的1字节复制到2字节
movsbl #作符号扩展的1字节复制到4字节
movsbq #作符号扩展的1字节复制到8字节
movswl #作符号扩展的2字节复制到4字节
movswq #作符号扩展的2字节复制到8字节
movslq #作符号扩展的4字节复制到8字节
The form of the movz instruction is as follows:
movzbw #作0扩展的1字节复制到2字节
movzbl #作0扩展的1字节复制到4字节
movzbq #作0扩展的1字节复制到8字节
movzwl #作0扩展的2字节复制到4字节
movzwq #作0扩展的2字节复制到8字节
movzlq #作0扩展的4字节复制到8字节
An example is as follows:
movl %ecx,%eax
movl (%ecx),%eax
The first instruction copies the value in the register ecx to the eax register; the second instruction uses the data in the ecx register as an address to access the memory, and loads the data in the memory into the eax register.
1.2 cmov instruction
The format of the cmov instruction is as follows:
cmovxx
Where xx represents one or more letters, which indicate the conditions that will trigger the transfer operation. The condition depends on the current value of the EFLAGS register.
Each of the eflags registers is shown in the figure below.
Among them, the bits in the eflags register related to the cmove instruction are CF (the mathematical expression produces a carry or borrow), OF (the integer value is infinite or too small), PF (the register contains incorrect data caused by mathematical operations), SF (result) Is positive and not negative) and ZF (the result is zero).
The following table shows unsigned conditional transfer instructions.
Instruction pair | describe | eflags status |
cmova/cmovnbe | Greater than/not less than or equal to | (CF or ZF)=0 |
cmovae/cmovnb | Greater than or equal to/not less than | CF=0 |
cmovnc | No carry | CF=0 |
cmovb/cmovnae | Greater than/not less than or equal to | CF=1 |
cmovc | carry | CF=1 |
cmovbe/cmovna | Less than or equal to/not greater than | (CF or ZF)=1 |
cmove/cmovz | Equal to/zero | ZF=1 |
cmovne/cmovnz | Not equal to/not zero | ZF=0 |
cmovp/cmovpe | Parity/even parity | PF=1 |
cmovnp/cmovpo | Non-parity/odd parity | PF=0 |
Unsigned conditional transfer instructions rely on carry, zero, and parity flags to determine the difference between two operands.
The following table shows the signed conditional transfer instructions.
Instruction pair | describe | eflags status |
cmovge/cmovnl | Greater than or equal to/not less than | (SF XOR OF)=0 |
cmovl/cmovnge | Greater than/not greater than or equal to | (SF XOR OF)=1 |
cmovle/cmovng | Less than or equal to/not greater than | ((SF XOR OF) or ZF)=1 |
cmovo | overflow | OF=1 |
cmovno | Not overflow | OF=0 |
cmovs | Signed (negative) | SF=1 |
cmovns | Unsigned (non-negative) | SF=0 |
An example is as follows:
// 将vlaue数值加载到ecx寄存器中
movl value,%ecx
// 使用cmp指令比较ecx和ebx这两个寄存器中的值,具体就是用ecx减去ebx然后设置eflags
cmp %ebx,%ecx
// 如果ecx的值大于ebx,使用cmova指令设置ebx的值为ecx中的值
cmova %ecx,%ebx
Note that the first operand of AT&T assembly is first, and the second operand is last.
1.3 push and pop instructions
The form of the push command is shown in the table below.
instruction | Operand | describe |
push | I/R/M | The PUSH instruction first reduces the value of ESP, and then copies the source operand to the stack. If the operand is 16 bits, then ESP is reduced by 2, and if the operand is 32 bits, then ESP is reduced by 4 |
pusha | The instructions sequentially (AX, CX, DX, BX, SP, BP, SI, and DI) push 16-bit general-purpose registers onto the stack. | |
pushad | The instructions follow the order of EAX, ECX, EDX, EBX, ESP (value before PUSHAD), EBP, ESI, and EDI to push all 32-bit general-purpose registers onto the stack. |
The format of the pop instruction is shown in the following table.
instruction | Operand | describe |
pop | R/M | The instruction first copies the contents of the stack element pointed to by ESP to a 16-bit or 32-bit destination operand, and then increases the value of ESP. If the operand is 16-bit, ESP is increased by 2. If the operand is 32-bit, ESP is increased by 4 |
popa | The instruction pops the same register from the stack in reverse order | |
popad | The instruction pops the same register from the stack in reverse order |
1.4 xchg and xchgl
This instruction is used to exchange the value of the operand. The exchange instruction XCHG is an instruction to exchange the contents between two registers, registers and memory variables. The data types of the two operands must be the same, which can be a byte or a word. , It can also be a double word. The format is as follows:
xchg R/M,R/M
xchgl I/R,I/R、
Both operands cannot be memory variables at the same time. The xchgl instruction is an old x86 instruction. Its function is to exchange the 4-byte values in two registers or memory addresses. Both values cannot be memory addresses, and it will not set the condition code.
1.5 lea
Lea calculates the actual address of the source operand and saves the result to the destination operand, which must be a general-purpose register. The format is as follows:
lea M,R
The lea (Load Effective Address) instruction loads the address into the register.
Examples are as follows:
movl 4(%ebx),%eax
leal 4(%ebx),%eax
The first instruction indicates that the result obtained by adding 4 to the value stored in the ebx register is accessed as the memory address, and the data stored in the memory address is loaded into the eax register.
The second instruction indicates that the result of adding 4 to the value stored in the ebx register is stored in the eax register as a memory address.
To give another example, as follows:
leaq a(b, c, d), %rax
Calculate the address a + b + c * d, and then load the final address into the register rax. It can be seen that it is just a simple calculation and does not refer to the register in the source operand. This can be used as a multiplication instruction.
2. Arithmetic operation instructions
The following describes the basic arithmetic instructions that operate on signed and unsigned integers.
2.1 add and adc instructions
The format of the instruction is as follows:
add I/R/M,R/M
adc I/R/M,R/M
The instruction adds two operands, and the result is stored in the second operand.
For the first instruction, because both registers and memory have bit width restrictions, overflow may occur during addition operations. If the operation overflows, the carry flag (CF) in the flag register eflags will be set to 1.
For the second instruction, using the adc instruction plus the carry flag eflags.CF, you can add 64-bit data on a 32-bit machine.
Conventional arithmetic and logic operation instructions only need to extend the original IA-32 instructions to 64 bits. Such as addq is the addition of four characters.
2.2 sub and sbb instructions
The format of the instruction is as follows:
sub I/R/M,R/M
sbb I/R/M,R/M
The instruction will subtract the first operand from the second operand, and the result will be stored in the second operand.
2.3 imul and mul instructions
The format of the instruction is as follows:
imul I/R/M,R
mul I/R/M,R
Multiply the first operand and the second operand, and write the result to the second operand. If the second operand is vacant, it defaults to the eax register, and the final complete result will be stored in edx:eax middle.
The first instruction performs signed multiplication, and the second instruction performs unsigned multiplication.
2.4 idiv and div instructions
The format of the instruction is as follows:
div R/M
idiv R/M
The first instruction performs unsigned division, and the second instruction performs signed division. The dividend is formed by splicing the edx register and the eax register, the divisor is specified by the first operand of the instruction, the calculated quotient is stored in the eax register, and the remainder is stored in the edx register. As shown below.
edx:eax
------------ = eax(商)... edx(余数)
寄存器
The bit width of the dividend, quotient, and divisor data is different during operation. The following table shows the registers used by the idiv instruction and the div instruction.
Data bit width | Dividend | divisor | business | remainder |
8-bit | ax | The first operand of the instruction | al | ah |
16 bits | dx:ax | The first operand of the instruction | ax | dx |
32 bit | edx:eax | The first operand of the instruction | eax | edx |
The idiv instruction and the div instruction usually divide the dividend whose bit width is 2 times the divisor. For example, for an x86-32 machine, the multiple of a general register is 32 bits, and a register cannot hold 64 bits of data, so edx stores the high 32 bits of the dividend, and the eax register stores the low 32 bits of the dividend.
Therefore, when performing division operations, the 32-bit data set in the eax register must be extended to 64 bits including the edx register, that is, signed for sign extension, and unsigned numbers for zero extension.
You can use cltd (AT&T style writing) or cdq (Intel style writing) for sign extension of edx. The format of the instruction is as follows:
cltd // 将eax寄存器中的数据符号扩展到edx:eax
cltd signs-extends the data in the eax register to edx:eax.
2.5 incl and decl instructions
The format of the instruction is as follows:
inc R/M
dec R/M
Add 1 or subtract 1 to the data stored in the register or memory location specified by the first operand of the instruction.
2.6 negl instruction
The format of the instruction is as follows:
neg R/M
The neg instruction reverses the sign of the first operand.
3. Bit operation instructions
3.1 andl, orl and xorl instructions
The format of the instruction is as follows:
and I/R/M,R/M
or I/R/M,R/M
xor I/R/M,R/M
The and instruction performs a bitwise AND operation between the second operand and the first operand, and writes the result into the second operand;
The or instruction performs a bitwise OR operation between the second operand and the first operand, and writes the result into the second operand;
The xor instruction performs a bitwise XOR operation between the second operand and the first operand, and writes the result to the second operand;
3.2 not command
The format of the instruction is as follows:
not R/M
Invert the operand in bits and write the result to the operand.
3.3 sal, sar, shr instructions
The format of the instruction is as follows:
sal I/%cl,R/M #算术左移
sar I/%cl,R/M #算术右移
shl I/%cl,R/M #逻辑左移
shr I/%cl,R/M #逻辑右移
The sal instruction shifts the second operand to the left according to the number of bits specified by the first operand, and writes the result into the second operand. After shifting, the vacant low bit is filled with 0. The first operand of the instruction can only be an 8-bit immediate value or cl register, and only the lower 5 bits of data are meaningful, higher or equal to 6 bits will cause all the data in the register to be removed. It becomes meaningless.
The sar instruction shifts the second operand to the right according to the number of bits specified by the first operand, and writes the result into the second operand. The space after the shift is sign extended. Like the sal instruction, the first operand of the sar instruction must also be an 8-bit immediate or cl register, and only the lower 5 bits of data are meaningful.
The actions of shl instruction and sall instruction are exactly the same, there is no need to distinguish.
The shr command shifts the second operand to the right according to the number of bits specified by the first operand, and writes the result to the second operand. The space after the shift is zero-extended. Like the sal instruction, the first operand of the shr instruction must also be an 8-bit immediate value or cl register, and only the lower 5 bits of data are meaningful.
4. Process control instructions
4.1 jmp instruction
The format of the instruction is as follows:
jmp I/R
The jmp instruction unconditionally jumps the program to the destination address specified by the operand. The jmp instruction can be regarded as an instruction to set the instruction pointer (eip register). The destination address can also be an asterisk followed by a stack of registers, which is an indirect function call. E.g:
jmp *%eax
Jump the program to the address contained in eax.
4.2 Conditional jump instructions
The format of the conditional jump instruction is as follows:
Jcc 目的地址
Where cc refers to the jump condition, if it is true, the program jumps to the destination address; otherwise, the next instruction is executed. The related conditional jump instructions are shown in the following table.
instruction | Jump condition | describe | instruction | Jump condition | describe |
jz | ZF=1 | Jump when 0 | jbe | CF=1 or ZF=1 | Jump when greater than or equal to |
jnz | ZF=0 | Jump when it is not 0 | jnbe | CF=0 and ZF=0 | Jump when less than or equal to |
je | ZF=1 | Jump when equal | jg | ZF=0 and SF=OF | Jump when greater than |
jne | ZF=0 | Jump when not equal | jng | ZF=1 or SF!=OF | Jump if not greater than |
ja | CF=0 and ZF=0 | Jump when greater than | jge | SF=OF | Jump when greater than or equal to |
jna | CF=1 or ZF=1 | Jump if not greater than | jnge | SF!=OF | Jump when less than or equal to |
jae | CF=0 | Jump when greater than or equal to | jl | SF!=OF | Jump when less than |
jnae | CF=1 | Jump when less than or equal to | jnl | SF=OF | Jump when not less than |
jb | CF=1 | Jump when greater than | jle | ZF=1 or SF!=OF | Jump when less than or equal to |
jnb | CF=0 | Jump if not greater than | jnle | ZF=0 and SF=OF | Jump when greater than or equal to |
4.3 cmp instruction
The format of the cmp command is as follows:
cmp I/R/M,R/M
The cmp instruction compares the difference between the second operand and the first operand, and sets the flag bit in the flag register eflags according to the result. The cmp instruction is similar to the sub instruction, but the cmp instruction does not change the value of the operand.
The relationship between the operand and the flag bit set is shown in the table.
The relationship between operands | CF | ZF | OF |
The first operand is less than the second operand | 0 | 0 | SF |
The first operand is equal to the second operand | 0 | 1 | 0 |
The first operand is greater than the second operand | 1 | 0 | not SF |
4.4 test command
The format of the instruction is as follows:
test I/R/M,R/M
The instruction compares the logical AND of the first operand with the second operand, and sets the flag bit in the flag register eflags according to the result. The test instruction is essentially the same as the and instruction, except that the test instruction does not change the value of the operand.
After the test instruction is executed, CF and OF are usually cleared, and ZF and SF are set according to the result of the operation. When the result of the operation is zero, ZF is set to 1, and SF has the same value as the highest bit.
An example is as follows:
The test command can check several bits at the same time. Suppose you want to know whether bit 0 and bit 3 of the AL register are set to 1, you can use the following command:
test al,00001001b #掩码为0000 1001,测试第0和位3位是否为1
From the following data set example, it can be inferred that the zero flag bit is set only when all test bits are cleared:
0 0 1 0 0 1 0 1 <- 输入值
0 0 0 0 1 0 0 1 <- 测试值
0 0 0 0 0 0 0 1 <- 结果:ZF=0
0 0 1 0 0 1 0 0 <- 输入值
0 0 0 0 1 0 0 1 <- 测试值
0 0 0 0 0 0 0 0 <- 结果:ZF=1
The test instruction always clears the overflow and carry flags, and the method of modifying the sign flag, zero flag and parity flag is the same as the AND instruction.
4.5 sete instruction
Set the target operand to 0 or 1 according to the status flags (CF, SF, OF, ZF and PF) in eflags. The target operand here points to a byte register (that is, an 8-bit register, such as AL, BL, CL) or a byte in memory. The status code suffix (cc) indicates the condition to be tested.
The format of the instruction to get the flag bit is as follows:
setcc R/M
The instruction sets the operand to 0 or 1 according to the value of the flag register eflags.
The cc in setcc is similar to the cc in Jcc, please refer to the table.
4.6 call instruction
The format of the instruction is as follows:
call I/R/M
The call instruction will call the function specified by the operand. The call instruction pushes the address of the next instruction to the stack, and then jumps to the address specified by the operand, so that the function can return from the sub-function by jumping to the address on the stack. Equivalent to
push %eip
jmp addr
First press the next address of the instruction, and then jump to the target address addr.
4.7 ret instruction
The format of the instruction is as follows:
ret
The ret instruction is used to return from a sub-function. In Linux of the X86 architecture, the return value of the function is set to the eax register and returned. It is equivalent to the following command:
popl %eip
Pop the “address of the next instruction of the call instruction” that pushes the call instruction onto the stack and set it to the instruction pointer. In this way, the program can correctly return to the place of the sub-function.
Physically speaking, the CALL instruction pushes its return address onto the stack, and then copies the address of the called procedure to the instruction pointer register. When the process is ready to return, its RET instruction pops the return address from the stack back to the instruction pointer register.
4.8 enter command
The enter instruction establishes the stack frame required by the function parameters and local variables for the function by initializing the ebp and esp registers. Equivalent to
push %rbp
mov %rsp,%rbp
4.9 leave instruction
leave removes the stack frame created with the enter instruction by restoring the ebp and esp registers. Equivalent to
mov %rbp, %rsp
pop %rbp
Point the stack pointer to the frame pointer, and then pop the backup original frame pointer to %ebp
5.0 int instruction
The format of the instruction is as follows:
int I
Cause an interrupt for the given number. This is usually used for system calls and other kernel interfaces.
5. Logo operation
The flags of the eflags register are shown in the figure below.
Some instructions for operating the eflags register flags are shown in the following table.
instruction | Operand | describe |
pushfd | R | The PUSHFD instruction pushes the contents of the 32-bit EFLAGS register onto the stack |
popfd | R | The POPFD instruction pops the contents of the top cell of the stack to the EFLAGS register |
cld | Set eflags.df to 0 |
Chapter 19-Load and Store Instructions (1)
The TemplateInterpreterGenerator::generate_all() function generates many routines (that is, machine instruction fragments, called Stub in English), including calling the set_entry_points_for_all_bytes() function to generate routines corresponding to each bytecode.
Eventually, it will call the TemplateInterpreterGenerator::generate_and_dispatch() function. The call stack is as follows:
TemplateTable::geneate() templateTable_x86_64.cpp
TemplateInterpreterGenerator::generate_and_dispatch() templateInterpreter.cpp
TemplateInterpreterGenerator::set_vtos_entry_points() templateInterpreter_x86_64.cpp
TemplateInterpreterGenerator::set_short_entry_points() templateInterpreter.cpp
TemplateInterpreterGenerator::set_entry_points() templateInterpreter.cpp
TemplateInterpreterGenerator::set_entry_points_for_all_bytes() templateInterpreter.cpp
TemplateInterpreterGenerator::generate_all() templateInterpreter.cpp
InterpreterGenerator::InterpreterGenerator() templateInterpreter_x86_64.cpp
TemplateInterpreter::initialize() templateInterpreter.cpp
interpreter_init() interpreter.cpp
init_globals() init.cpp
Many functions on the call stack have been introduced before, and each bytecode will specify a generator function, which is saved by the _gen attribute of the Template. Called in the TemplateTable::generate() function. _gen generates machine instruction fragments corresponding to each bytecode, so it is very important.
First look at a very simple nop bytecode instruction. The template attributes of this directive are as follows:
// Java spec bytecodes ubcp|disp|clvm|iswd in out generator argument
def(Bytecodes::_nop , ____|____|____|____, vtos, vtos, nop , _ );
The generator function generator of nop bytecode instructions does not generate any machine instructions, so the assembly code corresponding to the nop bytecode instructions only has the logic of the stack-top cache. The assembly code generated by calling the set_vtos_entry_points() function is as follows:
// aep
0x00007fffe1027c00: push %rax
0x00007fffe1027c01: jmpq 0x00007fffe1027c30
// fep
0x00007fffe1027c06: sub $0x8,%rsp
0x00007fffe1027c0a: vmovss %xmm0,(%rsp)
0x00007fffe1027c0f: jmpq 0x00007fffe1027c30
// dep
0x00007fffe1027c14: sub $0x10,%rsp
0x00007fffe1027c18: vmovsd %xmm0,(%rsp)
0x00007fffe1027c1d: jmpq 0x00007fffe1027c30
// lep
0x00007fffe1027c22: sub $0x10,%rsp
0x00007fffe1027c26: mov %rax,(%rsp)
0x00007fffe1027c2a: jmpq 0x00007fffe1027c30
// bep cep sep iep
0x00007fffe1027c2f: push %rax
// vep
// 接下来为取指逻辑,开始的地址为0x00007fffe1027c30
It can be seen that since tos_in is vtos, if it is aep, bep, cep, sep, and iep, directly use the push instruction to push the stack top cache value stored in %rax into the expression stack. For fep, dep, and lep, the corresponding memory size is opened on the stack, and then the value in the register is stored on the top of the expression stack, which has the same effect as the push instruction.
In the set_vtos_entry_points() function, the generate_and_dispatch() function is called to generate the machine instruction fragment of the nop instruction and the machine instruction fragment of the next bytecode instruction. nop will not generate any machine instructions, and the fragments for fetching are as follows:
// movzbl 将做了零扩展的字节传送到双字,地址为0x00007fffe1027c30
0x00007fffe1027c30: movzbl 0x1(%r13),%ebx
0x00007fffe1027c35: inc %r13
0x00007fffe1027c38: movabs $0x7ffff73ba4a0,%r10
// movabs的源操作数只能是立即数或标号(本质还是立即数),目的操作数是寄存器
0x00007fffe1027c42: jmpq *(%r10,%rbx,8)
r13 points to the address of the bytecode instruction currently to be fetched. Then %r13+1 skips the current nop instruction and points to the address of the next bytecode instruction, and then executes the movzbl instruction to load the pointed Opcode into %ebx.
The jump address through jmpq is %r10+%rbx*8. The jump address has been introduced in detail before, so I won't introduce it here.
We explained the nop instruction and reviewed the logic of the stack-top cache and the instruction fetching logic. For each bytecode instruction, there will be a stack-top cache and instruction fetching logic, which will be discussed later when introducing bytecode instructions. I won't introduce these two logics again.
The bytecode instructions for loading and storing operations are shown in the following table.
Bytecode | Particle | Command meaning |
0x00 | nop | do nothing |
0x01 | aconst_null | Push null to the top of the stack |
0x02 | iconst_m1 | Push int -1 to the top of the stack |
0x03 | iconst_0 | Push the int type 0 to the top of the stack |
0x04 | iconst_1 | Push the int type 1 to the top of the stack |
0x05 | iconst_2 | Push the int type 2 to the top of the stack |
0x06 | iconst_3 | Push the int type 3 to the top of the stack |
0x07 | iconst_4 | Push the int type 4 to the top of the stack |
0x08 | iconst_5 | Push the int type 5 to the top of the stack |
0x09 | lconst_0 | Push long 0 to the top of the stack |
0x0a | lconst_1 | Push long type 1 to the top of the stack |
0x0b | fconst_0 | Push float 0 to the top of the stack |
0x0c | fconst_1 | Push float type 1 to the top of the stack |
0x0d | fconst_2 | Push float 2 to the top of the stack |
0x0e | dconst_0 | Push double type 0 to the top of the stack |
0x0f | dconst_1 | Push double type 1 to the top of the stack |
0x10 | bipush | Push the single-byte constant value (-128~127) to the top of the stack |
0x11 | sipush | Push a short integer constant value (-32768~32767) to the top of the stack |
0x12 | ldc | Push int, float or String constant values from the constant pool to the top of the stack |
0x13 | ldc_w | Push the constant value of int, float or String type from the constant pool to the top of the stack (wide index) |
0x14 | ldc2_w | Push the long or double constant value from the constant pool to the top of the stack (wide index) |
0x15 | iload | Push the specified int type local variable to the top of the stack |
0x16 | lload | Push the specified long type local variable to the top of the stack |
0x17 | fload | Push the specified float type local variable to the top of the stack |
0x18 | dload | Push the specified double type local variable to the top of the stack |
0x19 | aload | Push the specified reference type local variable to the top of the stack |
0x1a | iload_0 | Push the first int type local variable to the top of the stack |
0x1b | iload_1 | Push the second int type local variable to the top of the stack |
0x1c | iload_2 | Push the third int type local variable to the top of the stack |
0x1d | iload_3 | Push the fourth int type local variable to the top of the stack |
0x1e | lload_0 | Push the first long local variable to the top of the stack |
0x1f | lload_1 | Push the second long local variable to the top of the stack |
0x20 | lload_2 | Push the third long local variable to the top of the stack |
0x21 | lload_3 | Push the fourth long local variable to the top of the stack |
0x22 | fload_0 | Push the first float type local variable to the top of the stack |
0x23 | fload_1 | Push the second float type local variable to the top of the stack |
0x24 | fload_2 | Push the third float type local variable to the top of the stack |
0x25 | fload_3 | Push the fourth float type local variable to the top of the stack |
0x26 | dload_0 | Push the first double type local variable to the top of the stack |
0x27 | dload_1 | Push the second double local variable to the top of the stack |
0x28 | dload_2 | Push the third double local variable to the top of the stack |
0x29 | dload_3 | Push the fourth double type local variable to the top of the stack |
0x2a | aload_0 | Push the first reference type local variable to the top of the stack |
0x2b | aload_1 | Push the second reference type local variable to the top of the stack |
0x2c | aload_2 | Push the third reference type local variable to the top of the stack |
0x2d | aload_3 | Push the fourth reference type local variable to the top of the stack |
0x2e | iaload | Push the value of the specified index of the int type array to the top of the stack |
0x2f | laload | Push the value of the specified index of the long array to the top of the stack |
0x30 | faload | Push the value of the specified index of the float type array to the top of the stack |
0x31 | daload | Push the value of the specified index of the double array to the top of the stack |
0x32 | aaload | Push the value of the specified index of the reference array to the top of the stack |
0x33 | baload | Push the value of the specified index of the boolean or byte array to the top of the stack |
0x34 | caload | Push the value of the specified index of the char array to the top of the stack |
0x35 | saload | Push the value of the specified index of the short array to the top of the stack |
0x36 | istore | Store the int type value on the top of the stack into the specified local variable |
0x37 | lstore | Store the long value on the top of the stack into the specified local variable |
0x38 | fstore | Store the float value on the top of the stack into the specified local variable |
0x39 | dstore | Store the double value on the top of the stack into the specified local variable |
0x3a | astore | Store the top-referenced value of the stack into the specified local variable |
0x3b | istore_0 | Store the int value on the top of the stack into the first local variable |
0x3c | istore_1 | Store the int value of the top of the stack into the second local variable |
0x3d | istore_2 | Store the int value on the top of the stack into the third local variable |
0x3e | istore_3 | Store the int value of the top of the stack into the fourth local variable |
0x3f | lstore_0 | Store the long value on the top of the stack into the first local variable |
0x40 | lstore_1 | Store the long value on the top of the stack into the second local variable |
0x41 | lstore_2 | Store the long value on the top of the stack into the third local variable |
0x42 | lstore_3 | Store the long value on the top of the stack into the fourth local variable |
0x43 | fstore_0 | Store the float value on the top of the stack into the first local variable |
0x44 | fstore_1 | Store the float value on the top of the stack into the second local variable |
0x45 | fstore_2 | Store the float value on the top of the stack into the third local variable |
0x46 | fstore_3 | Store the float value on the top of the stack into the fourth local variable |
0x47 | dstore_0 | Store the double value on the top of the stack into the first local variable |
0x48 | dstore_1 | Store the double value on the top of the stack into the second local variable |
0x49 | dstore_2 | Store the double value on the top of the stack into the third local variable |
0x4a | dstore_3 | Store the double value on the top of the stack into the fourth local variable |
0x4b | astore_0 | Store the top-referenced value of the stack into the first local variable |
0x4c | astore_1 | Store the top-referenced value of the stack into the second local variable |
0x4d | astore_2 | Store the top-referenced value of the stack in the third local variable |
0x4e | astore_3 | Store the top-referenced value of the stack into the fourth local variable |
0x4f | iastore | Store the top int value of the stack in the specified index position of the specified array |
0x50 | lastore | Store the long value on the top of the stack into the specified index position of the specified array |
0x51 | fastore | Store the float value on the top of the stack into the specified index position of the specified array |
0x52 | dastore | Store the double value on the top of the stack into the specified index position of the specified array |
0x53 | aastore | Store the top reference value of the stack in the specified index position of the specified array |
0x54 | bastore | Store the boolean or byte value at the top of the stack into the specified index position of the specified array |
0x55 | castore | Store the top char value of the stack into the specified index position of the specified array |
0x56 | sastore | Store the short value on the top of the stack into the specified index position of the specified array |
0xc4 | wide | The instruction to expand the access index of the local variable table |
We will not check the logic of the corresponding machine instruction fragment for each bytecode instruction (in fact, after decompiling the machine instruction fragment into assembly, understand the execution logic by viewing the assembly), the logic of some instructions is similar, so here we only choose A few typical introductions.
1. Push type instructions
(1) aconst_null instruction
aconst_null means sending null to the top of the stack. The template is defined as follows:
def(Bytecodes::_aconst_null , ____|____|____|____, vtos, atos, aconst_null , _ );
The assembly code of the instruction is as follows:
// xor 指令在两个操作数的对应位之间进行逻辑异或操作,并将结果存放在目标操作数中
// 第1个操作数和第2个操作数相同时,执行异或操作就相当于执行清零操作
xor %eax,%eax
Since tos_out is atos, the result at the top of the stack is cached in the %eax register, and only the xor operation is performed on the %eax register.
(2) iconst_m1 instruction
iconst_m1 means pushing -1 into the stack. The template is defined as follows:
def(Bytecodes::_iconst_m1 , ____|____|____|____, vtos, itos, iconst , -1 );
After the generated machine instructions are disassembled, the resulting assembly code is as follows:
mov $0xffffffff,%eax
Other bytecode instructions similar to the isconst_m1 bytecode instructions, such as isconst_0, isconst_1, etc., are defined as follows:
def(Bytecodes::_iconst_m1 , ____|____|____|____, vtos, itos, iconst , -1 );
def(Bytecodes::_iconst_0 , ____|____|____|____, vtos, itos, iconst , 0 );
def(Bytecodes::_iconst_1 , ____|____|____|____, vtos, itos, iconst , 1 );
def(Bytecodes::_iconst_2 , ____|____|____|____, vtos, itos, iconst , 2 );
def(Bytecodes::_iconst_3 , ____|____|____|____, vtos, itos, iconst , 3 );
def(Bytecodes::_iconst_4 , ____|____|____|____, vtos, itos, iconst , 4 );
def(Bytecodes::_iconst_5 , ____|____|____|____, vtos, itos, iconst , 5 );
As you can see, the generating functions are all the same TemplateTable::iconst() function.
The assembly code of iconst_0 is as follows:
xor %eax,%eax
The assembly code corresponding to the bytecode instructions of iconst_@ (@为1, 2, 3, 4, 5) is as follows:
// aep
0x00007fffe10150a0: push %rax
0x00007fffe10150a1: jmpq 0x00007fffe10150d0
// fep
0x00007fffe10150a6: sub $0x8,%rsp
0x00007fffe10150aa: vmovss %xmm0,(%rsp)
0x00007fffe10150af: jmpq 0x00007fffe10150d0
// dep
0x00007fffe10150b4: sub $0x10,%rsp
0x00007fffe10150b8: vmovsd %xmm0,(%rsp)
0x00007fffe10150bd: jmpq 0x00007fffe10150d0
// lep
0x00007fffe10150c2: sub $0x10,%rsp
0x00007fffe10150c6: mov %rax,(%rsp)
0x00007fffe10150ca: jmpq 0x00007fffe10150d0
// bep/cep/sep/iep
0x00007fffe10150cf: push %rax
// vep
0x00007fffe10150d0 mov $0x@,%eax // @代表1、2、3、4、5
If you have read the article I wrote before, then the above assembly code should be able to understand, I will not intro
**粗体** _斜体_ [链接](http://example.com) `代码` - 列表 > 引用
。你还可以使用@
来通知其他用户。