here it is my attempts...
a simple asm conversion... and then some 2... or 3 optimizations
i think this code in especial can be better optimized by aligning...
but i didnt tried that except from one jump that was badly misaligned :P (got 7 cycles there!)
Code: Select all
#include "Counter.bas"
Function gcd(u As Uinteger, v As Uinteger) As Uinteger
Dim As Uinteger sft=0, tmp=0
If (u*v = 0) Then Return 0
While (((u Or v) And 1) = 0)
sft += 1
u Shr= 1
v Shr= 1
Wend
While ((u And 1) = 0)
u Shr= 1
Wend
Do
While ((v And 1) = 0)
v Shr= 1
Wend
If (u < v) Then
v -= u
Else
tmp = u - v
u = v
v = tmp
End If
v Shr= 1
Loop While (v <> 0)
Return (u Shl sft)
End Function
' ******************************************************
' ******************************************************
' ******************************************************
Function gcd_asm(u As Uinteger, v As Uinteger) As Uinteger
asm
mov eax,[U] ' \
or eax,eax ' |
jz _GCD_END_ ' | if (u*v=0) then return
mov ebx,[V] ' |
or ebx,ebx ' |
jz _GCD_END_ ' /
xor ecx,ecx ' Dim As Uinteger sft=0
_WHILE1_BEGIN_: ' \
mov edx,eax ' |
or edx,ebx ' | While (((u Or v) And 1) = 0)
test edx,1 ' |
jnz _WHILE1_END_ ' /
inc ecx ' sft += 1
shr eax,1 ' u Shr= 1
shr ebx,1 ' v Shr= 1
jmp _WHILE1_BEGIN_ ' \
_WHILE1_END_: ' / Wend
_WHILE2_BEGIN_: ' \
test eax,1 ' | While ((u And 1) = 0)
jnz _WHILE2_END_ ' /
shr eax,1 ' u Shr= 1
jmp _WHILE2_BEGIN_ ' \
_WHILE2_END_: ' /
_DO1_BEGIN: ' do
_WHILE3_BEGIN_: ' \ while ((v And 1) = 0)
test ebx,1 ' |
jnz _WHILE3_END_ ' /
shr ebx,1 ' v Shr= 1
jmp _WHILE3_BEGIN_ ' \
_WHILE3_END_: ' / wend
cmp eax,ebx ' \ If (u < v) Then
jae _IF_ELSE1_ ' /
sub ebx,eax ' \
jmp _IF_END1_ ' / v -= u
_IF_ELSE1_: ' Else
mov edx,eax ' \
sub edx,ebx ' / tmp = u - v
mov eax,ebx ' u = v
mov ebx,edx ' v = tmp
_IF_END1_: ' end if
shr ebx,1 ' v Shr= 1
jnz _ODO1_BEGIN ' Loop While (v <> 0)
shl eax,cl ' \
mov [FUNCTION],eax ' | Return (u Shl sft)
_GCD_END_: ' /
end asm
End Function
' **********************************************************************
' **********************************************************************
' **********************************************************************
Function gcd_asm_optimized(u As Uinteger, v As Uinteger) As Uinteger
asm
mov eax,[U] ' \
or eax,eax ' |
jz _OGCD_END_ ' | if (u*v=0) then return
mov ebx,[V] ' |
or ebx,ebx ' |
jz _OGCD_END_ ' /
xor ecx,ecx ' Dim As Uinteger sft=0
mov edx,eax
or edx,ebx
_OWHILE1_BEGIN_: ' \
test edx,1 ' |
jnz _OWHILE1_ENDZ_ ' | While (((u Or v) And 1) = 0)
test edx,2 ' |
jnz _OWHILE1_END_ ' |
add ecx,2 ' sft += 1
shr edx,2
shr eax,2 ' u Shr= 1
shr ebx,2 ' v Shr= 1
jmp _OWHILE1_BEGIN_ ' \
_OWHILE1_END_: ' / Wend
shr eax,1
shr ebx,1
inc ecx
_OWHILE1_ENDZ_:
_OWHILE2_BEGIN_: ' \
test eax,1 ' | While ((u And 1) = 0)
jnz _OWHILE2_ENDZ_ ' /
test eax,2 ' | While ((u And 1) = 0)
jnz _OWHILE2_END_ ' /
shr eax,1 ' u Shr= 1
jmp _OWHILE2_BEGIN_ ' \
nop
nop
nop
nop
nop
_OWHILE2_END_: ' /
shr eax,1
_OWHILE2_ENDZ_:
_ODO1_BEGIN: ' do
_OWHILE3_BEGIN_: ' \ while ((v And 1) = 0)
test ebx,1 ' |
jnz _OWHILE3_ENDZ_ ' /
test ebx,2 ' |
jnz _OWHILE3_END_ ' /
shr ebx,2 ' v Shr= 1
jmp _OWHILE3_BEGIN_ ' \
_OWHILE3_END_: ' / wend
shr ebx,1
_OWHILE3_ENDZ_:
mov edx,ebx ' \ If (u < v) Then
sub ebx,eax ' \
jns _OIF_END1_ ' / v -= u
'_OIF_ELSE1_: ' Else
mov ebx,eax ' \
sub ebx,edx ' / tmp<v> = u - v
mov eax,edx ' u = v
_OIF_END1_: ' end if
shr ebx,1 ' v Shr= 1
jnz _ODO1_BEGIN ' Loop While (v <> 0)
shl eax,cl ' \
mov [FUNCTION],eax ' | Return (u Shl sft)
_OGCD_END_: ' /
end asm
End Function
dim as integer RESU
'--------------------------------------------------
#define Counter_Start Counter_Begin( 1000, HIGH_PRIORITY_CLASS )
'#define PARM &h22000,&hFF0000
#define PARM 123,456
Counter_Start
RESU = gcd( PARM )
Counter_End
Print counter_cycles; " cycles",,RESU
Counter_Start
RESU = gcd_asm( PARM )
Counter_End
Print counter_cycles; " cycles (asm)",RESU
Counter_Start
RESU = gcd_asm_optimized( PARM )
Counter_End
Print counter_cycles; " cycles (asm optimized)",RESU
sleep
and so... my results here:
Code: Select all
128 cycles 3
62 cycles (asm) 3
51 cycles (asm optimized) 3