现代cpu与奔腾4级别的差距?

基于汇编语言实现整数乘法循环很多次来看程序需要花费的时间.
有两种方法,分别是用移位指令shl和标准乘法指令mul.

实验结果如下:

                        shl        mul
4GHz的奔腾4机器         6078ms    20718ms
3.3GHz的i5-4590        7300ms     8400ms

可见shl指令性能主要看cpu频率,而现在cpu的mul指令性能似乎完全能和shl媲美了.

我想问:

  1. 现在的cpu在这道程序的shl指令上好像体现不出什么优势?
  2. mul指令按传统说法要比普通指令慢得多得多,为何这里的i5的mul指令性能基本与shl无差异?

只要给个概念性的回答即可


代码如下,用于参考

; Comparing Multiplications         (CompareMult.asm)

; This program compares the execution times of two approaches to 
; integer multiplication: Binary shifting versus the MUL instruction.

INCLUDE Irvine32.inc

LOOP_COUNT = 0FFFFFFFFh

.data
intval DWORD 5
startTime DWORD ?

.code
main PROC

; First approach:

    call    GetMseconds ; get start time
    mov startTime,eax

    mov eax,intval  ; multiply now
    call    mult_by_shifting

    call    GetMseconds ; get stop time
    sub eax,startTime
    call    WriteDec        ; display elapsed time
    call    Crlf

; Second approach:

    call    GetMseconds ; get start time
    mov startTime,eax

    mov eax,intval
    call    mult_by_MUL

    call    GetMseconds ; get stop time
    sub eax,startTime
    call    WriteDec        ; display elapsed time
    call    Crlf

    exit
main ENDP


;---------------------------------
mult_by_shifting PROC
;
; Multiplies EAX by 36 using SHL
;    LOOP_COUNT times.
; Receives: EAX
;---------------------------------

    mov ecx,LOOP_COUNT

L1: push    eax         ; save original EAX
    mov ebx,eax
    shl eax,5
    shl ebx,2
    add eax,ebx
    pop eax         ; restore EAX
    loop    L1

    ret
mult_by_shifting ENDP


;---------------------------------
mult_by_MUL PROC
;
; Multiplies EAX by 36 using MUL
;    LOOP_COUNT times.
; Receives: EAX
;---------------------------------

    mov ecx,LOOP_COUNT

L1: push    eax         ; save original EAX
    mov ebx,36
    mul ebx
    pop eax         ; restore EAX
    loop    L1

    ret
mult_by_MUL ENDP

END main
阅读 3.3k
撰写回答
你尚未登录,登录后可以
  • 和开发者交流问题的细节
  • 关注并接收问题和回答的更新提醒
  • 参与内容的编辑和改进,让解决方法与时俱进