unfortunately it's almost 4 times slower than FB native division
then I thought, why not use the FPU ?
the problem is that you can't directly load a 64-bit unsigned integer into the FPU registers but there's a simple solution
using the FPU for ulongInt division is actually a bit faster than native FB,I present to you both the intriguing but slow double precision and the FPU versions
Code: Select all
#cmdline "-asm intel -arch native -gen gcc -Wc -O3"
Extern "C"
Declare Function round(Byval As Double) As Double
Declare Function fma( Byval x As Double, Byval y As Double, Byval z As Double ) As Double
End Extern
Function div64dbl(Byval a As Ulongint, Byval b As Ulongint) As Ulongint
Dim bd As Double = Cdbl(b)
Dim bs As Single = Csng(bd)
Dim invbs0 As Single = 1.0f / bs
Dim invbd0 As Double = Cdbl(invbs0)
Dim Alpha As Double = fma(-bd, invbd0, 1.0)
Dim invbd As Double = fma(Alpha, invbd0, invbd0)
Dim ad As Double = Cdbl(a)
Dim q1d As Double = ad * invbd0
Dim q1 As Ulongint = round(q1d)
Dim r1 As Longint = a - (b * q1)
Dim r1d As Double = Cdbl(r1)
Dim q3d As Double = r1d * invbd
Dim q3 As Longint = round(q3d)
Dim r3 As Longint = r1 - (b * q3)
Dim q2 As Longint = Iif(r3 < 0, q3 - 1, q3)
Dim q0 As Ulongint = q1 + q2
Dim is_big As boolean = -(Clngint(b) < 0)
Dim if_big As Ulongint = -(a >= b)
Dim is_one As boolean = -(b <= 1)
Dim special As Ulongint = Iif(is_big, if_big, a)
Return Iif(is_one Orelse is_big, special, q0)
End Function
Private Function div64dbl2(Byval a As Ulongint, Byval b As Ulongint) As Ulongint
If b <= 1 Then
Return a
End If
If Clngint(b) < 0 Then
Return -(a >= b)
End If
Dim bd As Double = Cdbl(b)
Dim bs As Single = Csng(bd)
Dim invbs0 As Single = 1.0f / bs
Dim invbd0 As Double = Cdbl(invbs0)
Dim Alpha As Double = fma(-bd, invbd0, 1.0)
Dim invbd As Double = fma(Alpha, invbd0, invbd0)
Dim ad As Double = Cdbl(a)
Dim q1d As Double = ad * invbd0
Dim q1 As Ulongint = round(q1d)
Dim r1 As Longint = a - (b * q1)
Dim r1d As Double = Cdbl(r1)
Dim q3d As Double = r1d * invbd
Dim q3 As Longint = round(q3d)
Dim r3 As Longint = r1 - (b * q3)
Dim q2 As Longint = Iif(r3 < 0, q3 - 1, q3)
Return q1 + q2
End Function
Function div64fpu( Byref n As Ulongint, Byref m As Ulongint ) As Ulongint
Static As Ushort oldcw, cw
Asm
fstcw word Ptr [oldcw]
mov ax, word Ptr [oldcw]
Or ax, &hC99
mov word Ptr [cw], ax
fldcw word Ptr [cw]
mov rax, [n]
fild qword Ptr [rax]
test Byte Ptr [rax+7], 128
jz 1f
fadd dword Ptr 3[rip]
1:
mov rax, [m]
fild qword Ptr [rax]
test Byte Ptr [rax+7], 128
jz 2f
fadd dword Ptr 3[rip]
2:
fdivp st(1), st(0)
fistp qword Ptr [Function]
fldcw word Ptr [oldcw]
jmp 4f
3: .long &h5F800000
4:
End Asm
End Function
Dim As Ulongint r, n, m, i, x
Dim As Double t1, t2, t3, t4
n=4294967295555555ull
t1=Timer
x=0
For i=1 To 500000000
r=div64dbl(n, i)
x+=r
Next
t1=timer-t1
Print x
Print "div64dbl time = ";t1
Print
t2=Timer
x=0
For i=1 To 500000000
r=div64dbl2(n, i)
x+=r
Next
t2=timer-t2
Print x
Print "div64dbl2 time = ";t2
Print
t3=Timer
x=0
For i=1 To 500000000
r=div64fpu(n, i)
x+=r
Next
t3=timer-t3
Print x
Print "div64fpu time = ";t3
Print
t4=Timer
x=0
For i=1 To 500000000
r=n\i
x+=r
Next
t4=timer-t4
Print x
Print "FB div time = ";t4
Print "div64dbl time / FB time = ";t1/t4
Print "div64dbl2 time / FB time = ";t2/t4
Print "div64fpu time / FB time = ";t3/t4
Print
Print "Press RETURN to end"
Sleep