fast sine and fast cosine

Post your FreeBASIC tips and tricks here. Please don’t post your code without including an explanation.
srvaldez
Posts: 2650
Joined: Sep 25, 2005 21:54

fast sine and fast cosine

Postby srvaldez » Apr 29, 2020 13:19

here I compare the Function _ASM_Sin6th and the Function _ASM_Cos6th by UEZ with equivalent FB-implementations

Code: Select all

'by UEZ
Function _ASM_Sin6th(fX As Double) As Double 'by Eukalyptus
   Asm
      jmp 0f
      1: .Double 683565275.57643158
      2: .Double -0.0000000061763971109087229
      3: .Double 6755399441055744.0
         
      0:
         movq xmm0, [fX]
         mulsd xmm0, [1b]
         addsd xmm0, [3b]
         movd ebx, xmm0

         lea  eax, [ebx*2+0x80000000]
         sar  eax, 2
         imul eax
         sar  ebx, 31
         lea  eax, [edx*2-0x70000000]
         lea  ecx, [edx*8+edx-0x24000000]
         imul edx
         Xor  ecx, ebx
         lea  eax, [edx*8+edx+0x44A00000]
         imul ecx

         cvtsi2sd xmm0, edx
         mulsd xmm0, [2b]
         movq [Function], xmm0
   End Asm
End Function

'by UEZ
Function _ASM_Cos6th(fX As Double) As Double 'by Eukalyptus
   Asm
      jmp 0f
         1: .Double 683565275.57643158
         2: .Double -0.0000000061763971109087229
         3: .Double 6755399441055744.0
       
      0:
         movq xmm0, [fX]
         mulsd xmm0, [1b]
         addsd xmm0, [3b]
         movd ebx, xmm0
         
         Add ebx, 0x40000000 'SinToCos
   
         lea  eax, [ebx*2+0x80000000]
         sar  eax, 2
         imul eax
         sar  ebx, 31
         lea  eax, [edx*2-0x70000000]
         lea  ecx, [edx*8+edx-0x24000000]
         imul edx
         Xor  ecx, ebx
         lea  eax, [edx*8+edx+0x44A00000]
         imul ecx
         
         cvtsi2sd xmm0, edx
         mulsd xmm0, [2b]
         movq [Function], xmm0
   End Asm
End Function

private function fast_round(byval x as double) as long
   dim MAGIC_ROUND as const double = 6755399441055744.0
   union fast_trunc
      d as double
      type
         lw as long
         hw as long
      end type
   end union
   dim fast_trunc as fast_trunc
   fast_trunc.d = x
   fast_trunc.d += MAGIC_ROUND
   return fast_trunc.lw
end function

private function fast_sin(byval x as double) as double
   static PI as const double = 3.14159265358979323846264338327950288
   static PIhalh as const double = 1.5707963267948966192313216916398
   static INVPI as const double = 0.31830988618379067153776752674502872
   static A as const double = 0.7877963415144895477052689270382604344650e-2
   static B as const double = -0.1664011173315512939367501065243766307762
   static C as const double = 0.9999769956705319409897753213423989053597
   dim k as long
   dim x2 as double
   k = fast_round(INVPI * x)
   x -= k * PI
   x2 = x * x
   x = x * (C + (x2 * (B + (A * x2))))
   if k mod 2 then
      x = -x
   end if
   return x
end function

private function fast_sin2(byval x as double) as double
   static PI as const double = 3.14159265358979323846264338327950288
   static PIhalh as const double = 1.5707963267948966192313216916398
   static INVPI as const double = 0.31830988618379067153776752674502872

   static A as const double = -.1540557313583959620200034139475826213673
   static B as const double = 0.9975701467242784884900775894558674706250
   dim k as long
   dim x2 as double
   k = fast_round(INVPI * x)
   x -= k * PI
   x2 = x * x
   x = x * (B + (A * x2))
   if k mod 2 then
      x = -x
   end if
   return x
end function

private function fast_cos(byval x as double) as double
   static PI as const double = 3.14159265358979323846264338327950288
   static PIhalh as const double = 1.5707963267948966192313216916398
   static INVPI as const double = 0.31830988618379067153776752674502872
   static A as const double = 0.7877963415144895477052689270382604344650e-2
   static B as const double = -0.1664011173315512939367501065243766307762
   static C as const double = 0.9999769956705319409897753213423989053597

   dim k as long
   dim x2 as double
   x = PIhalh - x
   k = fast_round(INVPI * x)
   x -= k * PI
   x2 = x * x
   x = x * (C + (x2 * (B + (A * x2))))
   if k mod 2 then
      x = -x
   end if
   return x
end function

private function fast_cos2(byval x as double) as double
   static PI as const double = 3.14159265358979323846264338327950288
   static PIhalh as const double = 1.5707963267948966192313216916398
   static INVPI as const double = 0.31830988618379067153776752674502872

   static A as const double = -.1540557313583959620200034139475826213673
   static B as const double = 0.9975701467242784884900775894558674706250
   dim k as long
   dim x2 as double
   x = PIhalh - x
   k = fast_round(INVPI * x)
   x -= k * PI
   x2 = x * x
   x = x * (B + (A * x2))
   if k mod 2 then
      x = -x
   end if
   return x
end function

Print "just now starting"
dim as double s, t
dim as double x
dim as integer k

s=0
t=timer
for k=1 to 100000000
   x=k
   s+=_ASM_Sin6th(x)
next
t=timer-t
Print "_ASM_Sin6th time: seconds = ";
Print using "##.###  sum = ##.####";t;s;: Print " true 1.713649346570576"

s=0
t=timer
for k=1 to 100000000
   x=k
   s+=_ASM_Cos6th(x)
next
t=timer-t
Print "_ASM_Cos6th time: seconds = ";
Print using "##.###  sum = ##.####";t;s;: Print " true 0.1709843554183985"

Print "------------------------"
s=0
t=timer
for k=1 to 100000000
   x=k
   s+=fast_sin(x)
next
t=timer-t
Print "fast_sin time: seconds = ";
Print using "##.###  sum = ##.####";t;s;: Print " true 1.713649346570576"

s=0
t=timer
for k=1 to 100000000
   x=k
   s+=fast_cos(x)
next
t=timer-t
Print "fast_cos time: seconds = ";
Print using "##.###  sum = ##.####";t;s;: Print " true 0.1709843554183985"

Print "------------------------"
s=0
t=timer
for k=1 to 100000000
   x=k
   s+=fast_sin2(x)
next
t=timer-t
Print "fast_sin2 time: seconds = ";
Print using "##.###  sum = ##.####";t;s;: Print " true 1.713649346570576"

s=0
t=timer
for k=1 to 100000000
   x=k
   s+=fast_cos2(x)
next
t=timer-t
Print "fast_cos2 time: seconds = ";
Print using "##.###  sum = ##.####";t;s;: Print " true 0.1709843554183985"

Print "------------------------"
s=0
t=timer
for k=1 to 100000000
   x=k
   s+=sin(x)
next
t=timer-t
Print "FB built-in sin time: seconds = ";
Print using "##.###  sum = ##.###############";t;s;: Print " true 1.713649346570576"

s=0
t=timer
for k=1 to 100000000
   x=k
   s+=cos(x)
next
t=timer-t
Print "FB built-in cos time: seconds = ";
Print using "##.###  sum = ##.###############";t;s;: Print " true 0.1709843554183985"

output on my PC

Code: Select all

just now starting
_ASM_Sin6th time: seconds =  0.393  sum = 29.1971 true 1.713649346570576
_ASM_Cos6th time: seconds =  0.408  sum =  9.3472 true 0.1709843554183985
------------------------
fast_sin time: seconds =  0.156  sum =  1.7134 true 1.713649346570576
fast_cos time: seconds =  0.181  sum =  0.1707 true 0.1709843554183985
------------------------
fast_sin2 time: seconds =  0.138  sum =  1.7187 true 1.713649346570576
fast_cos2 time: seconds =  0.158  sum =  0.1785 true 0.1709843554183985
------------------------
FB built-in sin time: seconds =  2.841  sum =  1.713649346570280 true 1.713649346570576
FB built-in cos time: seconds =  2.870  sum =  0.170984355418202 true 0.1709843554183985
Last edited by srvaldez on Apr 29, 2020 14:25, edited 1 time in total.
Imortis
Moderator
Posts: 1744
Joined: Jun 02, 2005 15:10
Location: USA
Contact:

Re: fast sine and fast cosine

Postby Imortis » Apr 29, 2020 13:29

How do those stack up against the built in sine and cosine functions in FB?
srvaldez
Posts: 2650
Joined: Sep 25, 2005 21:54

Re: fast sine and fast cosine

Postby srvaldez » Apr 29, 2020 14:27

@Imortis
I edited my first post to include the FB built-in sine and cosine functions
caseih
Posts: 1614
Joined: Feb 26, 2007 5:32

Re: fast sine and fast cosine

Postby caseih » Apr 29, 2020 15:28

How do the optimized routines from the widely-used gsl library stack up? FB ships with gsl headers.
UEZ
Posts: 688
Joined: May 05, 2017 19:59
Location: Germany

Re: fast sine and fast cosine

Postby UEZ » Apr 29, 2020 15:33

Here my results on Win10 x64 (AMD Ryzen 5 PRO 3500U w/ Radeon Vega Mobile Gfx).

x86 (w/o any compiler settings):

Code: Select all

just now starting
_ASM_Sin6th time: seconds =  2.588  sum = 29.1971 true 1.713649346570576
_ASM_Cos6th time: seconds =  2.651  sum =  9.3472 true 0.1709843554183985
------------------------
fast_sin time: seconds =  6.797  sum =  1.7134 true 1.713649346570576
fast_cos time: seconds =  7.671  sum =  0.1706 true 0.1709843554183985
------------------------
fast_sin2 time: seconds =  6.475  sum =  1.7199 true 1.713649346570576
fast_cos2 time: seconds =  6.995  sum =  0.1794 true 0.1709843554183985
------------------------
FB built-in sin time: seconds =  3.384  sum =  1.713649346571280 true 1.713649346570576
FB built-in cos time: seconds =  3.359  sum =  0.170984355418203 true 0.1709843554183985


x64 (w/o any compiler settings):

Code: Select all

just now starting
_ASM_Sin6th time: seconds =  1.264  sum = 29.1971 true 1.713649346570576
_ASM_Cos6th time: seconds =  1.239  sum =  9.3472 true 0.1709843554183985
------------------------
fast_sin time: seconds =  2.469  sum =  1.7134 true 1.713649346570576
fast_cos time: seconds =  2.769  sum =  0.1707 true 0.1709843554183985
------------------------
fast_sin2 time: seconds =  2.423  sum =  1.7187 true 1.713649346570576
fast_cos2 time: seconds =  2.534  sum =  0.1785 true 0.1709843554183985
------------------------
FB built-in sin time: seconds =  6.427  sum =  1.713649346570082 true 1.713649346570576
FB built-in cos time: seconds =  6.278  sum =  0.170984355418414 true 0.1709843554183985


x86 (-gen gcc -Wc -Ofast):

Code: Select all

just now starting
_ASM_Sin6th time: seconds =  0.526  sum = 29.1971 true 1.713649346570576
_ASM_Cos6th time: seconds =  0.495  sum =  9.3472 true 0.1709843554183985
------------------------
fast_sin time: seconds =  0.741  sum =  1.7134 true 1.713649346570576
fast_cos time: seconds =  0.796  sum =  0.1706 true 0.1709843554183985
------------------------
fast_sin2 time: seconds =  0.620  sum =  1.7199 true 1.713649346570576
fast_cos2 time: seconds =  0.669  sum =  0.1794 true 0.1709843554183985
------------------------
FB built-in sin time: seconds =  3.075  sum =  1.713649346570662 true 1.713649346570576
FB built-in cos time: seconds =  3.128  sum =  0.170984355418296 true 0.1709843554183985


x64 (-gen gcc -Wc -Ofast):

Code: Select all

just now starting
_ASM_Sin6th time: seconds =  0.400  sum = 29.1971 true 1.713649346570576
_ASM_Cos6th time: seconds =  0.385  sum =  9.3472 true 0.1709843554183985
------------------------
fast_sin time: seconds =  0.959  sum =  1.7134 true 1.713649346570576
fast_cos time: seconds =  0.407  sum =  0.1707 true 0.1709843554183985
------------------------
fast_sin2 time: seconds =  0.966  sum =  1.7187 true 1.713649346570576
fast_cos2 time: seconds =  0.330  sum =  0.1785 true 0.1709843554183985
------------------------
FB built-in sin time: seconds =  5.972  sum =  1.713649346570082 true 1.713649346570576
FB built-in cos time: seconds =  5.957  sum =  0.170984355418414 true 0.1709843554183985


x86 (-gen gcc -Wc -Ofast -Wc -march=native -Wc -funroll-loops -Wc -mfpmath=sse):

Code: Select all

just now starting
_ASM_Sin6th time: seconds =  0.423  sum = 29.1971 true 1.713649346570576
_ASM_Cos6th time: seconds =  0.420  sum =  9.3472 true 0.1709843554183985
------------------------
fast_sin time: seconds =  0.247  sum =  1.7134 true 1.713649346570576
fast_cos time: seconds =  0.273  sum =  0.1707 true 0.1709843554183985
------------------------
fast_sin2 time: seconds =  0.205  sum =  1.7187 true 1.713649346570576
fast_cos2 time: seconds =  0.229  sum =  0.1785 true 0.1709843554183985
------------------------
FB built-in sin time: seconds =  7.178  sum =  1.713649346570283 true 1.713649346570576
FB built-in cos time: seconds =  7.376  sum =  0.170984355418203 true 0.1709843554183985


x64 (-gen gcc -Wc -Ofast -Wc -march=native -Wc -funroll-loops -Wc -mfpmath=sse):

Code: Select all

just now starting
_ASM_Sin6th time: seconds =  0.395  sum = 29.1971 true 1.713649346570576
_ASM_Cos6th time: seconds =  0.379  sum =  9.3472 true 0.1709843554183985
------------------------
fast_sin time: seconds =  0.937  sum =  1.7134 true 1.713649346570576
fast_cos time: seconds =  0.975  sum =  0.1707 true 0.1709843554183985
------------------------
fast_sin2 time: seconds =  0.697  sum =  1.7187 true 1.713649346570576
fast_cos2 time: seconds =  0.584  sum =  0.1785 true 0.1709843554183985
------------------------
FB built-in sin time: seconds =  6.155  sum =  1.713649346570082 true 1.713649346570576
FB built-in cos time: seconds =  6.145  sum =  0.170984355418414 true 0.1709843554183985
srvaldez
Posts: 2650
Joined: Sep 25, 2005 21:54

Re: fast sine and fast cosine

Postby srvaldez » Apr 29, 2020 16:11

@UEZ
obviously the Intel and AMD CPU's perform differently, here are my times FBx64 using fbc -arch native -asm intel -gen gcc -Wc -Ofast
thank you for testing

Code: Select all

_ASM_Sin6th time: seconds =  0.3319944000104442          29.19713395039345          true 1.713649346570576
_ASM_Cos6th time: seconds =  0.3561769000079949          9.347239003491984          true 0.1709843554183985
------------------------
fast_sin time: seconds =  0.152  sum =  1.7134 true 1.713649346570576
fast_cos time: seconds =  0.173  sum =  0.1707 true 0.1709843554183985
------------------------
fast_sin2 time: seconds =  0.136  sum =  1.7187 true 1.713649346570576
fast_cos2 time: seconds =  0.155  sum =  0.1785 true 0.1709843554183985
------------------------
FB built-in sin time: seconds =  2.974  sum =  1.713649346570280 true 1.713649346570576
FB built-in cos time: seconds =  2.972  sum =  0.170984355418202 true 0.1709843554183985

@caseih
I will test and report back
[edit] can't get the latest gsl to pass the tests, will keep trying
srvaldez
Posts: 2650
Joined: Sep 25, 2005 21:54

Re: fast sine and fast cosine

Postby srvaldez » Apr 29, 2020 19:10

@caseih
it din't fare too well

Code: Select all

gsl_sf_sin time: seconds =  5.952  sum =  1.713649346570745 true 1.713649346570576
gsl_sf_cos time: seconds =  5.896  sum =  0.170984355417845 true 0.1709843554183985
jj2007
Posts: 1962
Joined: Oct 23, 2016 15:28
Location: Roma, Italia
Contact:

Re: fast sine and fast cosine

Postby jj2007 » Apr 30, 2020 5:59

Core i5 on Win7-64:

Code: Select all

just now starting
_ASM_Sin6th time: seconds =  2.509  sum = 29.1971 true 1.713649346570576
_ASM_Cos6th time: seconds =  2.518  sum =  9.3472 true 0.1709843554183985
------------------------
fast_sin time: seconds =  6.066  sum =  1.7134 true 1.713649346570576
fast_cos time: seconds =  6.064  sum =  0.1706 true 0.1709843554183985
------------------------
fast_sin2 time: seconds =  5.484  sum =  1.7199 true 1.713649346570576
fast_cos2 time: seconds =  5.777  sum =  0.1794 true 0.1709843554183985
------------------------
FB built-in sin time: seconds =  3.925  sum =  1.713649346571278 true 1.713649346570576
FB built-in cos time: seconds =  3.820  sum =  0.170984355418200 true 0.1709843554183985

Perhaps a slightly different setup would be more realistic:

Code: Select all

Print "------------------------"
s=0
iterations=0
#define innerloops 100000
t=timer
for k=0 to 359
   x=k*0.01745329251994329576923690768489   ' degrees to rad
   for inner=1 to innerloops
    s+=sin(x)
    iterations+=1
   next
next
t=timer-t
Print "FB built-in sin time: seconds = ";
Print using "##.###  sum = ##.###############";t;s;: Print " with";iterations/1000000;" Million iterations"
Output:

Code: Select all

FB built-in sin time: seconds =  1.404  sum =  0.000000000548384 with 36 Million iterations
Note the expected sum is zero.
srvaldez
Posts: 2650
Joined: Sep 25, 2005 21:54

Re: fast sine and fast cosine

Postby srvaldez » Apr 30, 2020 11:54

Hi jj2007
please try with the following compile command: fbc -gen gcc -Wc -O2
the times on my PC using that command are
32-bit

Code: Select all

_ASM_Sin6th time: seconds =  1.452  sum = 29.1971 true 1.713649346570576
_ASM_Cos6th time: seconds =  1.463  sum =  9.3472 true 0.1709843554183985
------------------------
fast_sin time: seconds =  0.312  sum =  1.7134 true 1.713649346570576
fast_cos time: seconds =  0.310  sum =  0.1706 true 0.1709843554183985
------------------------
fast_sin2 time: seconds =  0.252  sum =  1.7199 true 1.713649346570576
fast_cos2 time: seconds =  0.261  sum =  0.1794 true 0.1709843554183985
------------------------
FB built-in sin time: seconds =  2.882  sum =  1.713649346570280 true 1.713649346570576
FB built-in cos time: seconds =  2.909  sum =  0.170984355418202 true 0.1709843554183985

64-bit

Code: Select all

_ASM_Sin6th time: seconds =  0.413  sum = 29.1971 true 1.713649346570576
_ASM_Cos6th time: seconds =  0.417  sum =  9.3472 true 0.1709843554183985
------------------------
fast_sin time: seconds =  0.235  sum =  1.7134 true 1.713649346570576
fast_cos time: seconds =  0.259  sum =  0.1707 true 0.1709843554183985
------------------------
fast_sin2 time: seconds =  0.191  sum =  1.7187 true 1.713649346570576
fast_cos2 time: seconds =  0.214  sum =  0.1785 true 0.1709843554183985
------------------------
FB built-in sin time: seconds =  3.020  sum =  1.713649346570280 true 1.713649346570576
FB built-in cos time: seconds =  3.004  sum =  0.170984355418202 true 0.1709843554183985

@jj
your sum would indeed sum-up to 0, but then you only have a very small range of values
jj2007
Posts: 1962
Joined: Oct 23, 2016 15:28
Location: Roma, Italia
Contact:

Re: fast sine and fast cosine

Postby jj2007 » Apr 30, 2020 15:41

srvaldez wrote:your sum would indeed sum-up to 0, but then you only have a very small range of values
Which values other than 0..360° are relevant?
angros47
Posts: 1822
Joined: Jun 21, 2005 19:04

Re: fast sine and fast cosine

Postby angros47 » Apr 30, 2020 16:36

If you want a very fast way to get sine and cosine, you can just pre-calculate a lookup table, with the desired precision (in the software midi synthesizer used in my sound library that solution is used)
srvaldez
Posts: 2650
Joined: Sep 25, 2005 21:54

Re: fast sine and fast cosine

Postby srvaldez » Apr 30, 2020 20:54

@UEZ
I modified your sine and cosine to naked functions, there may be a small speed increase but what's more important is that it compiles without complaint whereas your originals failed to compile with gcc-10 and with -Ofast

Code: Select all

Function _ASM_Sin6th naked cdecl(byval fX As Double) As Double
   'By Eukalyptus
   Asm
   ' if FB-32-bit, then load fx from stack, else it's already in xmm0
   ' ebx/rbx needs to be preserved, not sure about ecx/rcx
      #ifndef __FB_64BIT__
         lea eax, [esp+4]
         push ebx
         push ecx
         movq xmm0, [eax]
      #else
         push rbx
         push rcx
      #endif
         mulsd xmm0, [1f]
         addsd xmm0, [3f]
         movd ebx, xmm0

         lea  eax, [ebx*2+0x80000000]
         sar  eax, 2
         imul eax
         sar  ebx, 31
         lea  eax, [edx*2-0x70000000]
         lea  ecx, [edx*8+edx-0x24000000]
         imul edx
         Xor  ecx, ebx
         lea  eax, [edx*8+edx+0x44A00000]
         imul ecx

         cvtsi2sd xmm0, edx
         mulsd xmm0, [2f]
      ' if FB-32-bit, then transfer xmm0 into fpu, else we are done
      ' restore saved registers
      #ifndef __FB_64BIT__
         pop ecx
         pop ebx
         movq [esp-12], xmm0
         fld qword ptr [esp-12]
      #else
         pop rcx
         pop rbx
      #endif
         ret
      1: .Double 683565275.57643158
      2: .Double -0.0000000061763971109087229
      3: .Double 6755399441055744.0
   End Asm
End Function

Function _ASM_Cos6th naked cdecl(byval fX As Double) As Double
   'By Eukalyptus
   Asm
   ' if FB-32-bit, then load fx from stack, else it's already in xmm0
   ' ebx/rbx needs to be preserved, not sure about ecx/rcx
      #ifndef __FB_64BIT__
         lea eax, [esp+4]
         push ebx
         push ecx
         movq xmm0, [eax]
      #else
         push rbx
         push rcx
      #endif
         mulsd xmm0, [1f]
         addsd xmm0, [3f]
         movd ebx, xmm0

         Add ebx, 0x40000000 'SinToCos

         lea  eax, [ebx*2+0x80000000]
         sar  eax, 2
         imul eax
         sar  ebx, 31
         lea  eax, [edx*2-0x70000000]
         lea  ecx, [edx*8+edx-0x24000000]
         imul edx
         Xor  ecx, ebx
         lea  eax, [edx*8+edx+0x44A00000]
         imul ecx

         cvtsi2sd xmm0, edx
         mulsd xmm0, [2f]
      ' if FB-32-bit, then transfer xmm0 into fpu, else we are done
      ' restore saved registers
      #ifndef __FB_64BIT__
         pop ecx
         pop ebx
         movq [esp-12], xmm0
         fld qword ptr [esp-12]
      #else
         pop rcx
         pop rbx
      #endif
         ret
      1: .Double 683565275.57643158
      2: .Double -0.0000000061763971109087229
      3: .Double 6755399441055744.0
   End Asm
End Function
UEZ
Posts: 688
Joined: May 05, 2017 19:59
Location: Germany

Re: fast sine and fast cosine

Postby UEZ » May 01, 2020 11:55

@srvaldez: thank you, it will be useful to have the naked sexy version, too. ^^

The functions are written by Eukalyptus, not by me!
dodicat
Posts: 6809
Joined: Jan 10, 2006 20:30
Location: Scotland

Re: fast sine and fast cosine

Postby dodicat » May 01, 2020 15:56

Could you check your workings, or am I using the functions wrongly?

Code: Select all

Function _ASM_Sin6th Naked Cdecl(Byval fX As Double) As Double
   'By Eukalyptus
   Asm
   ' if FB-32-bit, then load fx from stack, else it's already in xmm0
   ' ebx/rbx needs to be preserved, not sure about ecx/rcx
      #ifndef __FB_64BIT__
         lea eax, [esp+4]
         push ebx
         push ecx
         movq xmm0, [eax]
      #Else
         push rbx
         push rcx
      #endif
         mulsd xmm0, [1f]
         addsd xmm0, [3f]
         movd ebx, xmm0

         lea  eax, [ebx*2+0x80000000]
         sar  eax, 2
         imul eax
         sar  ebx, 31
         lea  eax, [edx*2-0x70000000]
         lea  ecx, [edx*8+edx-0x24000000]
         imul edx
         Xor  ecx, ebx
         lea  eax, [edx*8+edx+0x44A00000]
         imul ecx

         cvtsi2sd xmm0, edx
         mulsd xmm0, [2f]
      ' if FB-32-bit, then transfer xmm0 into fpu, else we are done
      ' restore saved registers
      #ifndef __FB_64BIT__
         pop ecx
         pop ebx
         movq [esp-12], xmm0
         fld qword Ptr [esp-12]
      #Else
         pop rcx
         pop rbx
      #endif
         ret
      1: .Double 683565275.57643158
      2: .Double -0.0000000061763971109087229
      3: .Double 6755399441055744.0
   End Asm
End Function

Function _ASM_Cos6th Naked Cdecl(Byval fX As Double) As Double
   'By Eukalyptus
   Asm
   ' if FB-32-bit, then load fx from stack, else it's already in xmm0
   ' ebx/rbx needs to be preserved, not sure about ecx/rcx
      #ifndef __FB_64BIT__
         lea eax, [esp+4]
         push ebx
         push ecx
         movq xmm0, [eax]
      #Else
         push rbx
         push rcx
      #endif
         mulsd xmm0, [1f]
         addsd xmm0, [3f]
         movd ebx, xmm0

         Add ebx, 0x40000000 'SinToCos

         lea  eax, [ebx*2+0x80000000]
         sar  eax, 2
         imul eax
         sar  ebx, 31
         lea  eax, [edx*2-0x70000000]
         lea  ecx, [edx*8+edx-0x24000000]
         imul edx
         Xor  ecx, ebx
         lea  eax, [edx*8+edx+0x44A00000]
         imul ecx

         cvtsi2sd xmm0, edx
         mulsd xmm0, [2f]
      ' if FB-32-bit, then transfer xmm0 into fpu, else we are done
      ' restore saved registers
      #ifndef __FB_64BIT__
         pop ecx
         pop ebx
         movq [esp-12], xmm0
         fld qword Ptr [esp-12]
      #Else
         pop rcx
         pop rbx
      #endif
         ret
      1: .Double 683565275.57643158
      2: .Double -0.0000000061763971109087229
      3: .Double 6755399441055744.0
   End Asm
End Function

#macro SinCosDbl (Angle,Sin_,Cos_)
Asm
  fld qword Ptr [ Angle ]
  fsincos               
  fstp qword Ptr [Cos_]
  fstp qword Ptr [Sin_]
End Asm
#endmacro

dim as long lim=10000000
dim as double acc,stp=.89
dim as double s,c,t
acc=0
t=timer
for n as double=-lim to lim step stp
    s=_ASM_Sin6th(n)
    c=_ASM_Cos6th(n)
    acc+=s+c
    next
    print timer-t,acc,"Naked"
   
    acc=0
    t=timer
for n as double=-lim to lim step stp
    SinCosDbl(n,s,c)
    acc+=s+c
    next
    print timer-t,acc,"Sincos"
   
      acc=0
     
    t=timer
for n as double=-lim to lim step stp
    s=sin(n)
    c=cos(n)
    acc+=s+c
    next
    print timer-t,acc,"FB"
    sleep
   


 
srvaldez
Posts: 2650
Joined: Sep 25, 2005 21:54

Re: fast sine and fast cosine

Postby srvaldez » May 01, 2020 16:18

Hi dodicat
you exceeded the range of the asm implementations, the max for lim is about 1000000, and the precision is only about 2 or 3 digits

Return to “Tips and Tricks”

Who is online

Users browsing this forum: No registered users and 4 guests