My most recent 32-bit macros (as previously posted, but with some minor changes):
Code: Select all
''=============================================================================
#include "windows.bi"
''=============================================================================
'' These two macros provide a convenient method of measuring the processor
'' clock-cycle count for a block of code. The macros must be called in pairs,
'' and the block of code, or a call to a procedure containing the block of
'' code, must be placed between the counter_begin and counter_end macro calls.
'' The average per-loop cycle count, corrected for the loop overhead, is
'' returned in the global variable counter_cycles.
''
'' I provided access to the process priority class and the thread priority to
'' make it possible to operate at the highest possible priority by using the
'' combination of REALTIME_PRIORITY_CLASS and THREAD_PRIORITY_TIME_CRITICAL.
'' On a multi-core system (or even a P4 with HT) running Windows XP, doing so
'' appears to be reasonably safe, even if the code being timed triggers an
'' exception. But note that running such a high priority on a single-core
'' system can cause the system to hang.
''
'' Note that CPUID will alter the value of EBX.
''=============================================================================
dim shared as longint counter_cycles
dim shared as integer _loop_count_, _loop_counter_
dim shared as integer _process_priority_class_, _thread_priority_
#macro COUNTER_BEGIN( loop_count, process_priority, thread_priority )
_loop_count_ = loop_count
_process_priority_class_ = GetPriorityClass(GetCurrentProcess())
_thread_priority_ = GetThreadPriority(GetCurrentThread())
SetPriorityClass(GetCurrentProcess(), process_priority)
SetThreadPriority(GetCurrentThread(), thread_priority)
_loop_counter_ = _loop_count_
asm
xor eax, eax
cpuid
rdtsc
push edx
push eax
xor eax, eax
cpuid
.balign 16
0:
sub DWORD PTR _loop_counter_, 1
jnz 0b
xor eax, eax
cpuid
rdtsc
pop ecx
sub eax, ecx
pop ecx
sbb edx, ecx
push edx
push eax
xor eax, eax
cpuid
rdtsc
push edx
push eax
mov eax, _loop_count_
mov _loop_counter_, eax
xor eax, eax
cpuid
.balign 16
1:
end asm
#endmacro
#macro COUNTER_END()
asm
sub DWORD PTR _loop_counter_, 1
jnz 1b
xor eax, eax
cpuid
rdtsc
pop ecx
sub eax, ecx
pop ecx
sbb edx, ecx
pop ecx
sub eax, ecx
pop ecx
sbb edx, ecx
mov DWORD PTR [counter_cycles], eax
mov DWORD PTR [counter_cycles+4], edx
end asm
SetPriorityClass(GetCurrentProcess(),_process_priority_class_)
SetThreadPriority(GetCurrentThread(),_thread_priority_)
counter_cycles /= _loop_count_
#endmacro
''=============================================================================
Code: Select all
''=============================================================================
'' This is for FreeBASIC Compiler Version 1.03.0 (07-01-2015), built for win64
''=============================================================================
'' These two macros, which are coded to be -gen gcc / FB 64-bit compatible,
'' provide a convenient method of measuring the processor clock-cycle count
'' for a block of code. The macros must be called in pairs, and the block
'' of code, or a call to a procedure containing the block of code, must be
'' placed between the counter_begin and counter_end macro calls. The average
'' per-loop cycle count, corrected for the loop overhead, is returned in the
'' global variable counter_cycles.
''
'' I provided access to the process priority class and the thread priority to
'' make it possible to operate at the highest possible priority by using the
'' combination of REALTIME_PRIORITY_CLASS and THREAD_PRIORITY_TIME_CRITICAL.
'' On a multi-core system (or even a P4 with HT) running Windows XP, doing so
'' appears to be reasonably safe, even if the code being timed triggers an
'' exception. But note that running such a high priority on a single-core
'' system can cause the system to hang.
''
'' The loops and the cycle-count calculations are done entirely in assembly
'' to avoid problems with compiler optimizations breaking the code.
''
'' Note that CPUID will alter the value of EBX.
''=============================================================================
dim shared as integer counter_cycles
dim shared as long _loop_count_, _loop_counter_
dim shared as DWORD _process_priority_class_
dim shared as long _thread_priority_
#macro COUNTER_BEGIN( loop_count, process_priority, thread_priority )
_loop_count_ = loop_count
_process_priority_class_ = GetPriorityClass(GetCurrentProcess())
_thread_priority_ = GetThreadPriority(GetCurrentThread())
SetPriorityClass(GetCurrentProcess(), process_priority)
SetThreadPriority(GetCurrentThread(), thread_priority)
_loop_counter_ = _loop_count_
asm
xor eax, eax
cpuid
rdtsc
push rdx
push rax
xor eax, eax
cpuid
.balign 16
0:
sub DWORD PTR [_loop_counter_], 1
jnz 0b
xor eax, eax
cpuid
rdtsc
pop rcx
sub eax, ecx
pop rcx
sbb edx, ecx
push rdx
push rax
xor eax, eax
cpuid
rdtsc
push rdx
push rax
mov eax, _loop_count_
mov _loop_counter_, eax
xor eax, eax
cpuid
.balign 16
1:
end asm
#endmacro
#macro COUNTER_END
asm
sub DWORD PTR [_loop_counter_], 1
jnz 1b
xor eax, eax
cpuid
rdtsc
pop rcx
sub eax, ecx
pop rcx
sbb edx, ecx
pop rcx
sub eax, ecx
pop rcx
sbb edx, ecx
mov DWORD PTR [counter_cycles], eax
mov DWORD PTR [counter_cycles+4], edx
end asm
SetPriorityClass(GetCurrentProcess(),_process_priority_class_)
SetThreadPriority(GetCurrentThread(),_thread_priority_)
counter_cycles /= _loop_count_
#endmacro
Code: Select all
''=============================================================================
''
'' THIS IS THE FINAL REVISION FOR THE 1.02.0 WIN64 COMPILER.
''
''=============================================================================
'' These two macros, which are coded to be -gen gcc / FB 64-bit compatible,
'' provide a convenient method of measuring the processor clock-cycle count
'' for a block of code. The macros must be called in pairs, and the block
'' of code, or a call to a procedure containing the block of code, must be
'' placed between the counter_begin and counter_end macro calls. The average
'' per-loop cycle count, corrected for the loop overhead, is returned in the
'' global variable counter_cycles.
''
'' I provided access to the process priority class and the thread priority to
'' make it possible to operate at the highest possible priority by using the
'' combination of REALTIME_PRIORITY_CLASS and THREAD_PRIORITY_TIME_CRITICAL.
'' On a multi-core system (or even a P4 with HT) running Windows XP, doing so
'' appears to be reasonably safe, even if the code being timed triggers an
'' exception. But note that running such a high priority on a single-core
'' system can cause the system to hang.
''
'' The loops and the cycle-count calculations are done entirely in assembly
'' to avoid problems with compiler optimizations breaking the code.
''
'' Note that CPUID will alter the value of EBX.
''=============================================================================
dim shared as integer counter_cycles
dim shared as long _loop_count_, _loop_counter_
dim shared as DWORD _process_priority_class_
dim shared as long _thread_priority_
#macro COUNTER_BEGIN( loop_count, process_priority, thread_priority )
_loop_count_ = loop_count
_process_priority_class_ = GetPriorityClass(GetCurrentProcess())
_thread_priority_ = GetThreadPriority(GetCurrentThread())
SetPriorityClass(GetCurrentProcess(), process_priority)
SetThreadPriority(GetCurrentThread(), thread_priority)
_loop_counter_ = _loop_count_
asm
".intel_syntax noprefix"
"xor eax, eax"
"cpuid"
"rdtsc"
"push rdx"
"push rax"
"xor eax, eax"
"cpuid"
".balign 16"
"0:"
"sub DWORD PTR [_LOOP_COUNTER_$], 1"
"jnz 0b"
"xor eax, eax"
"cpuid"
"rdtsc"
"pop rcx"
"sub eax, ecx"
"pop rcx"
"sbb edx, ecx"
"push rdx"
"push rax"
"xor eax, eax"
"cpuid"
"rdtsc"
"push rdx"
"push rax"
"mov eax, _LOOP_COUNT_$"
"mov _LOOP_COUNTER_$, eax"
"xor eax, eax"
"cpuid"
".balign 16"
"1:"
".att_syntax prefix"
end asm
#endmacro
#macro COUNTER_END
asm
".intel_syntax noprefix"
"sub DWORD PTR [_LOOP_COUNTER_$], 1"
"jnz 1b"
"xor eax, eax"
"cpuid"
"rdtsc"
"pop rcx"
"sub eax, ecx"
"pop rcx"
"sbb edx, ecx"
"pop rcx"
"sub eax, ecx"
"pop rcx"
"sbb edx, ecx"
"mov [COUNTER_CYCLES$], eax"
"mov [COUNTER_CYCLES$+4], edx"
".att_syntax prefix"
end asm
SetPriorityClass(GetCurrentProcess(),_process_priority_class_)
SetThreadPriority(GetCurrentThread(),_thread_priority_)
counter_cycles /= _loop_count_
#endmacro
Code: Select all
''=============================================================================
#include "counter32.bas"
''=============================================================================
dim as integer x=1,y=2,z=3
dim as long xL=1,yL=2,zL=3
dim as longint xLi=1,yLi=2,zLi=3
dim as any ptr p1, p2
p1 = allocate(100*4)
p2 = allocate(100*4)
SetProcessAffinityMask( GetCurrentProcess(), 1)
sleep 5000
for i as integer = 1 to 3
counter_begin(1000000,REALTIME_PRIORITY_CLASS,THREAD_PRIORITY_TIME_CRITICAL)
z = 0
z = (x*x+y*y)\(x+1)
counter_end()
print counter_cycles;" cycles integer"
counter_begin(1000000,REALTIME_PRIORITY_CLASS,THREAD_PRIORITY_TIME_CRITICAL)
zL = 0
zL = (xL*xL+yL*yL)\(xL+1)
counter_end()
print counter_cycles;" cycles long"
counter_begin(1000000,REALTIME_PRIORITY_CLASS,THREAD_PRIORITY_TIME_CRITICAL)
zLi = 0
zLi = (xLi*xLi+yLi*yLi)\(xLi+1)
counter_end()
print counter_cycles;" cycles longint"
print
next
for i as integer = 1 to 3
counter_begin(1000000,REALTIME_PRIORITY_CLASS,THREAD_PRIORITY_TIME_CRITICAL)
asm
push edi
push esi
mov esi, [p1]
mov edi, [p2]
mov ecx, 100
rep movsd
pop esi
pop edi
end asm
counter_end()
print counter_cycles;" cycles, rep movsd * 100"
next
sleep
Code: Select all
''=============================================================================
''---------------------------------------------------------------
'' These declarations are the minimum required for this app, to
'' replace the functionality of the windows.bi that is currently
'' missing from the 64-bit version.
''---------------------------------------------------------------
type HANDLE as any ptr
type DWORD as uinteger
type WINBOOL as integer
type BOOL as WINBOOL
#define REALTIME_PRIORITY_CLASS &h00000100
#define THREAD_PRIORITY_TIME_CRITICAL 15
extern "windows" lib "kernel32"
declare function GetCurrentProcess () as HANDLE
declare function GetCurrentThread () as HANDLE
declare function GetPriorityClass (byval as HANDLE) as DWORD
declare function GetThreadPriority (byval as HANDLE) as integer
declare function SetPriorityClass (byval as HANDLE, byval as DWORD) as BOOL
declare function SetThreadPriority (byval as HANDLE, byval as integer) as BOOL
declare function SetProcessAffinityMask (byval as HANDLE, byval as DWORD) as BOOL
end extern
''=============================================================================
#include "counter64.bas"
''=============================================================================
dim as integer x=1,y=2,z=3
dim as long xL=1,yL=2,zL=3
dim as longint xLi=1,yLi=2,zLi=3
dim shared as any ptr p1, p2
p1 = allocate(100*8)
p2 = allocate(100*8)
SetProcessAffinityMask( GetCurrentProcess(), 1)
sleep 5000
for i as integer = 1 to 3
counter_begin(1000000,REALTIME_PRIORITY_CLASS,THREAD_PRIORITY_TIME_CRITICAL)
z = 0
z = (x*x+y*y)\(x+1)
counter_end
print counter_cycles;" cycles integer"
counter_begin(1000000,REALTIME_PRIORITY_CLASS,THREAD_PRIORITY_TIME_CRITICAL)
zL = 0
zL = (xL*xL+yL*yL)\(xL+1)
counter_end
print counter_cycles;" cycles long"
counter_begin(1000000,REALTIME_PRIORITY_CLASS,THREAD_PRIORITY_TIME_CRITICAL)
zLi = 0
zLi = (xLi*xLi+yLi*yLi)\(xLi+1)
counter_end
print counter_cycles;" cycles longint"
print
next
for i as integer = 1 to 3
counter_begin(1000000,REALTIME_PRIORITY_CLASS,THREAD_PRIORITY_TIME_CRITICAL)
asm
".intel_syntax noprefix"
"push rdi"
"push rsi"
"mov esi, P1$"
"mov edi, P2$"
"mov ecx, 100"
'' test64.asm:937: Error: same type of prefix used twice for:
'' "rex64 rep movsq"
"rep movsq"
"pop rsi"
"pop rdi"
".att_syntax prefix"
end asm
counter_end
print counter_cycles;" cycles, rep movsq * 100"
next
sleep
Code: Select all
5 cycles integer
5 cycles long
112 cycles longint
5 cycles integer
5 cycles long
112 cycles longint
5 cycles integer
5 cycles long
111 cycles longint
65 cycles, rep movsd * 100
64 cycles, rep movsd * 100
64 cycles, rep movsd * 100
Code: Select all
25 cycles integer
28 cycles long
25 cycles longint
25 cycles integer
28 cycles long
25 cycles longint
25 cycles integer
28 cycles long
25 cycles longint
75 cycles, rep movsq * 100
77 cycles, rep movsq * 100
76 cycles, rep movsq * 100