FB not faster for me.

DOS specific questions.
Dinosaur
Posts: 1481
Joined: Jul 24, 2005 1:13
Location: Hervey Bay (.au)

Post by Dinosaur »

Hi all

Below the result.

Code: Select all

Profiling results:
------------------

Executable name: c:/portio.exe
Launched on: 08-03-2005, 09:08:12
Total program execution time: 22.03 seconds

Per function timings:

        Function:                               Time:         Total%:   Proc%:

(main)                                          22.03000      100.00%

        _fb_StrCompare                          4.10000       18.61%    18.61%
        _fb_UCASE                               2.39000       10.85%    10.85%
        _fb_ConsoleInput                        2.08000       9.44%     9.44%
        _DOSMSEC                                1.31000       5.95%     5.95%
        _fb_Inkey                               1.28000       5.81%     5.81%
        _fb_StrAssign                           1.14000       5.17%     5.17%
        _DIAGNOSTIC                             1.10000       4.99%     4.99%
        _SETPORT                                0.06000       0.27%     0.27%
        _fb_StrAllocTempDescF                   0.00000       0.00%     0.00%
        _fb_PrintString                         0.00000       0.00%     0.00%
        _fb_InputInt                            0.00000       0.00%     0.00%
        _PORTCONFIG                             0.00000       0.00%     0.00%
        _fb_Cls                                 0.00000       0.00%     0.00%
        _fb_ErrorSetHandler                     0.00000       0.00%     0.00%
        _KEYASSIGN                              0.00000       0.00%     0.00%
        _fb_Locate                              0.00000       0.00%     0.00%
        _fb_PrintInt                            0.00000       0.00%     0.00%


_BINCONVERT                                     0.55000       2.50%

        _fb_StrConcatAssign                     0.22000       1.00%     40.00%
        _fb_StrAssign                           0.00000       0.00%     0.00%


_DIAGNOSTIC                                     1.10000       4.99%

        _BINCONVERT                             0.55000       2.50%     50.00%
        _inportb                                0.11000       0.50%     10.00%
        _fb_Locate                              0.05000       0.23%     4.55%
        _fb_PrintString                         0.05000       0.23%     4.55%
        _fb_PrintInt                            0.00000       0.00%     0.00%
        _fb_StrAllocTempDescF                   0.00000       0.00%     0.00%
        _fb_Cls                                 0.00000       0.00%     0.00%
        _fb_StrDelete                           0.00000       0.00%     0.00%


_KEYASSIGN                                      0.00000       0.00%

        _fb_StrDelete                           0.00000       0.00%     0.00%
        _fb_StrAssign                           0.00000       0.00%     0.00%


_PORTCONFIG                                     0.00000       0.00%

        _outportb                               0.00000       0.00%     100.00%


_SETPORT                                        0.06000       0.27%

        _fb_PrintString                         0.06000       0.27%     100.00%
        _inportb                                0.00000       0.00%     0.00%
        _fb_StrConcat                           0.00000       0.00%     0.00%
        _fb_HEX_b                               0.00000       0.00%     0.00%
        _fb_Locate                              0.00000       0.00%     0.00%



Global timings:

(main)                                          22.03000  (100.00%)
_fb_StrCompare                                  4.10000  (18.61%)
_fb_UCASE                                       2.39000  (10.85%)
_fb_ConsoleInput                                2.08000  (9.44%)
_DOSMSEC                                        1.31000  (5.95%)
_fb_Inkey                                       1.28000  (5.81%)
_DIAGNOSTIC                                     1.10000  (4.99%)
_BINCONVERT                                     0.55000  (2.50%)
_fb_StrConcatAssign                             0.22000  (1.00%)
_inportb                                        0.11000  (0.50%)
_SETPORT                                        0.06000  (0.27%)
_fb_Locate                                      0.05000  (0.23%)
_fb_PrintString                                 0.05000  (0.23%)
_fb_StrAssign                                   0.00000  (0.00%)
_fb_PrintInt                                    0.00000  (0.00%)
_fb_StrAllocTempDescF                           0.00000  (0.00%)
_fb_InputInt                                    0.00000  (0.00%)
_outportb                                       0.00000  (0.00%)
_PORTCONFIG                                     0.00000  (0.00%)
_fb_Cls                                         0.00000  (0.00%)
_fb_ErrorSetHandler                             0.00000  (0.00%)
_KEYASSIGN                                      0.00000  (0.00%)
_fb_StrConcat                                   0.00000  (0.00%)
_fb_HEX_b                                       0.00000  (0.00%)
_fb_StrDelete                                   0.00000  (0.00%)
Looks like StrCompare is the culprit.
Will do another test with no string operations in qb or Fb

Regards
Dinosaur
Posts: 1481
Joined: Jul 24, 2005 1:13
Location: Hervey Bay (.au)

Post by Dinosaur »

Hi all

This test is for a timing loop of 30 secsonly.
No string operation, no inkey$ checking.
With no -profile and no -g for debug code.
qb did 527,300 loops, fb did 155,300
Not looking good.

Code: Select all

DO	
    Loops& = Loops& + 1
    IF DosMsec& - StartTime& > 3000 THEN
        CALL DIAGNOSTIC(1)
        StartTime& = DosMsec&
        Locate 20,10:print DosMsec&,Loops&
        Loops& = 0
        exit do
    END IF
LOOP
end
This is the result

Code: Select all

Profiling results:
------------------

Executable name: c:/portio1.exe
Launched on: 08-03-2005, 09:54:41
Total program execution time:  30.6 seconds

Per function timings:

        Function:                               Time:         Total%:   Proc%:

(main)                                          30.60000      100.00%

        _DOSMSEC                                15.47000      50.56%    50.56%
        _DIAGNOSTIC                             0.49000       1.60%     1.60%
        _SETPORT                                0.06000       0.20%     0.20%
        _fb_PrintInt                            0.00000       0.00%     0.00%
        _fb_Locate                              0.00000       0.00%     0.00%
        _fb_ErrorSetHandler                     0.00000       0.00%     0.00%
        _fb_Cls                                 0.00000       0.00%     0.00%
        _KEYASSIGN                              0.00000       0.00%     0.00%


_BINCONVERT                                     0.44000       1.44%

        _fb_StrConcatAssign                     0.18000       0.59%     40.91%
        _fb_StrAssign                           0.10000       0.33%     22.73%


_DIAGNOSTIC                                     0.49000       1.60%

        _BINCONVERT                             0.44000       1.44%     89.80%
        _fb_StrDelete                           0.00000       0.00%     0.00%
        _fb_Cls                                 0.00000       0.00%     0.00%
        _fb_StrAllocTempDescF                   0.00000       0.00%     0.00%
        _fb_PrintInt                            0.00000       0.00%     0.00%
        _fb_PrintString                         0.00000       0.00%     0.00%
        _fb_Locate                              0.00000       0.00%     0.00%
        _inportb                                0.00000       0.00%     0.00%


_KEYASSIGN                                      0.00000       0.00%

        _fb_StrDelete                           0.00000       0.00%     0.00%
        _fb_StrAssign                           0.00000       0.00%     0.00%


_SETPORT                                        0.06000       0.20%

        _inportb                                0.00000       0.00%     0.00%
        _fb_Locate                              0.00000       0.00%     0.00%
        _fb_StrConcat                           0.00000       0.00%     0.00%
        _fb_HEX_b                               0.00000       0.00%     0.00%
        _fb_PrintString                         0.00000       0.00%     0.00%



Global timings:

(main)                                          30.60000  (100.00%)
_DOSMSEC                                        15.47000  (50.56%)
_DIAGNOSTIC                                     0.49000  (1.60%)
_BINCONVERT                                     0.44000  (1.44%)
_fb_StrConcatAssign                             0.18000  (0.59%)
_fb_StrAssign                                   0.10000  (0.33%)
_SETPORT                                        0.06000  (0.20%)
_fb_StrDelete                                   0.00000  (0.00%)
_fb_ErrorSetHandler                             0.00000  (0.00%)
_KEYASSIGN                                      0.00000  (0.00%)
_fb_Cls                                         0.00000  (0.00%)
_fb_StrConcat                                   0.00000  (0.00%)
_fb_HEX_b                                       0.00000  (0.00%)
_fb_StrAllocTempDescF                           0.00000  (0.00%)
_fb_PrintInt                                    0.00000  (0.00%)
_fb_PrintString                                 0.00000  (0.00%)
_fb_Locate                                      0.00000  (0.00%)
_inportb                                        0.00000  (0.00%)
If 50% of its time was in DosMsec& then where is the other 50 % ?

Will do one more test, using timer to eliminate the switching to vm.

Regards
DrV
Site Admin
Posts: 2116
Joined: May 27, 2005 18:39
Location: Midwestern USA
Contact:

Post by DrV »

Try either TIMER or this code:

Code: Select all

FUNCTION DOSMSEC& 
	dim r as __dpmi_regs
	
	r.h.ah = &H2C
	__dpmi_int(&H21, @r)
	
	'ch= hours 
	'cl= minutes 
	'dh= seconds 
	'dl= deci secs (1/100's) 
	
	return (r.h.ch * 60 * 60 * 100) + (r.h.cl * 60 * 60) + (r.h.dh * 60) + r.h.dl
end function
Dinosaur
Posts: 1481
Joined: Jul 24, 2005 1:13
Location: Hervey Bay (.au)

Post by Dinosaur »

Drv

Have tried Timer, and the result is significantly worse.
I cant help but think that I am missing something, but what. ?
qb loops = 144,800, fb loops = 11,000
The following is the profile.
Why is there 20 + percent missing.?

Code: Select all

Profiling results:
------------------

Executable name: c:/portio1.exe
Launched on: 08-03-2005, 10:26:46
Total program execution time:  3.24 seconds

Per function timings:

        Function:                               Time:         Total%:   Proc%:

(main)                                          3.24000       100.00%

        _fb_Timer                               1.76000       54.32%    54.32%
        _DIAGNOSTIC                             0.55000       16.98%    16.98%
        _SETPORT                                0.06000       1.85%     1.85%
        _fb_PrintInt                            0.00000       0.00%     0.00%
        _fb_Locate                              0.00000       0.00%     0.00%
        _fb_ErrorSetHandler                     0.00000       0.00%     0.00%
        _fb_Cls                                 0.00000       0.00%     0.00%
        _KEYASSIGN                              0.00000       0.00%     0.00%


_BINCONVERT                                     0.38000       11.73%

        _fb_StrConcatAssign                     0.10000       3.09%     26.32%
        _fb_StrAssign                           0.00000       0.00%     0.00%


_DIAGNOSTIC                                     0.55000       16.98%

        _BINCONVERT                             0.38000       11.73%    69.09%
        _fb_StrDelete                           0.00000       0.00%     0.00%
        _fb_Cls                                 0.00000       0.00%     0.00%
        _fb_StrAllocTempDescF                   0.00000       0.00%     0.00%
        _fb_PrintInt                            0.00000       0.00%     0.00%
        _fb_PrintString                         0.00000       0.00%     0.00%
        _fb_Locate                              0.00000       0.00%     0.00%
        _inportb                                0.00000       0.00%     0.00%


_KEYASSIGN                                      0.00000       0.00%

        _fb_StrDelete                           0.00000       0.00%     0.00%
        _fb_StrAssign                           0.00000       0.00%     0.00%


_SETPORT                                        0.06000       1.85%

        _inportb                                0.00000       0.00%     0.00%
        _fb_HEX_b                               0.00000       0.00%     0.00%
        _fb_Locate                              0.00000       0.00%     0.00%
        _fb_StrConcat                           0.00000       0.00%     0.00%
        _fb_PrintString                         0.00000       0.00%     0.00%



Global timings:

(main)                                          3.24000  (100.00%)
_fb_Timer                                       1.76000  (54.32%)
_DIAGNOSTIC                                     0.55000  (16.98%)
_BINCONVERT                                     0.38000  (11.73%)
_fb_StrConcatAssign                             0.10000  (3.09%)
_SETPORT                                        0.06000  (1.85%)
_fb_StrDelete                                   0.00000  (0.00%)
_fb_ErrorSetHandler                             0.00000  (0.00%)
_fb_Cls                                         0.00000  (0.00%)
_KEYASSIGN                                      0.00000  (0.00%)
_fb_HEX_b                                       0.00000  (0.00%)
_fb_StrConcat                                   0.00000  (0.00%)
_fb_StrAllocTempDescF                           0.00000  (0.00%)
_fb_PrintInt                                    0.00000  (0.00%)
_fb_PrintString                                 0.00000  (0.00%)
_fb_Locate                                      0.00000  (0.00%)
_fb_StrAssign                                   0.00000  (0.00%)
_inportb                                        0.00000  (0.00%)
Will try your code.
Regards
Dinosaur
Posts: 1481
Joined: Jul 24, 2005 1:13
Location: Hervey Bay (.au)

Post by Dinosaur »

Code: Select all

ASMRoutines.bas(6) : error 14: Expected identifier, found: '__dpmi_regs'

   dim r as __dpmi_regs 
         ^
Any other libraries needed. ?
mjs
Site Admin
Posts: 842
Joined: Jun 16, 2005 19:50
Location: Germany

Post by mjs »

The other 50% are used by the loop and the IF statement itself. Hmm ... the produced assembler code isn't optimal but it's not too bad either. I simply have no clue why the application performance is worse than the QB counterpart.

The only problem I can think of is that the interrupt call is very expensive (much more expensive than in 16 bit DOS). Maybe this is the real bottleneck because using INT directly causes that the DPMI server interrupt handler gets called. He recognizes that you want to call a DOS interrupt and redirects it to the DOS interrupt. This is pure overhead.

Please try the following implementation:

Code: Select all

#include "dos\dpmi.bi"

FUNCTION DOSMSEC&
    DIM r AS __dpmi_regs
    r.h.ah = &H2c
    __dpmi_simulate_real_mode_interrupt &H21, @r
    FUNCTION = r.h.dl * 10 + r.h.dh * 1000 + r.h.cl * 60000 + r.h.ch * 3600000
END FUNCTION
EDIT: As you can see, I use __dpmi_simulate_real_mode_interrupt instead of __dpmi_int because __dpmi_int also sets SS:SP which might cause unpredictable results when SS and SP aren't set correctly (might overwrite HMA).

Regards,
Mark
Last edited by mjs on Aug 03, 2005 0:22, edited 1 time in total.
Dinosaur
Posts: 1481
Joined: Jul 24, 2005 1:13
Location: Hervey Bay (.au)

Post by Dinosaur »

sorry forgot dpmi.bi
mjs
Site Admin
Posts: 842
Joined: Jun 16, 2005 19:50
Location: Germany

Post by mjs »

This is the fastest solution I've found ad hoc:

Code: Select all

FUNCTION DOSMSEC AS LONG
asm
   PUSH    ECX
   MOV     AH,&H2C
   INT     &H21
   PUSH    EDX
   MOV     EBX, 10
   MOVZX   EAX, DL
   MUL     EBX
   XCHG    [ESP], EAX
   MOVZX   EAX, AH
   MOV     BX, 1000
   MUL     EBX
   ADD     [ESP], EAX
   MOVZX   EAX, CL
   MOV     BX, 60000
   MUL     EBX
   ADD     [ESP], EAX
   MOV     EBX, 3600000
   MOVZX   EAX, CH
   MUL     EBX
   POP     ECX
   ADD     ECX, EAX
   MOV     [FUNCTION], ECX
   POP     ECX
end asm
END FUNCTION
It's quite strange but this method is faster than calling the __dpmi_simulate_real_mode_interrupt function - at least in a Win32 console. I don't know how fast it is on a real DOS.

BTW: It seems that this function works. The code you posted gave some strange results ... and the three seconds it should wait for were definately much longer ...

EDIT: I can only guess that the problem is the DPMI server overhead and I can only see the following solutions:
  1. Write your own protected mode interrupt handler for the timer interrupt that stores the time in a variable that can be accessed directly from your application.
  2. Use another DPMI server. You might try GO32, CWSDPMI, WDOSX, DOS32, the EMM386 DPMI implementation from DR-DOS, etc...
Regards,
Mark
Last edited by mjs on Aug 03, 2005 0:21, edited 1 time in total.
Dinosaur
Posts: 1481
Joined: Jul 24, 2005 1:13
Location: Hervey Bay (.au)

Post by Dinosaur »

Drv and mjs
Thanks for your efforts.

The code posted had an error.

Code: Select all

return (r.h.ch * 60 * 60 * 100) + (r.h.cl * 60 * 60) + (r.h.dh * 60) + r.h.dl 
it should be (i used function, but ignore that)

Code: Select all

FUNCTION = (r.h.ch * 60 * 60 * 100) + (r.h.cl * 60 * 100) + (r.h.dh * 100) + r.h.dl 
secs x 100 to get deci-secs in each bracket set.

When I ran that, each one terminated after about 302. (3 secs)
qb did 526,000 loops
fb did 133,000 loops

mentally counting, each was about 3 secs correctly.

mjs will try your example next.
Dinosaur
Posts: 1481
Joined: Jul 24, 2005 1:13
Location: Hervey Bay (.au)

Post by Dinosaur »

well, you were right. It is faster, but still way behind qb in good old 16 bit.
qb did about 526,000 loops
fb did about 162,000 loops.

By the way, some of the confusion is in terminology.
I called the routine DosMsec, but I am in fact checking for 1/100 sec which is the best you get from this interupt.

The difference in performance is to great to ignore.
OK perhaps this difference is only in calling interupts.
You recall in an earlier reply I said
My reading of the info suggests that the interupts are available, but only under dos. Also recall that the DPMI interface switches back to vm for certain interupts
well maybe you guys can give me a speed test, that uses interupts, but not one that goes back to vm. ?

Edit:
Using cwsdpmi gives 148,000 loops.
Note that it unloads after the application terminates, which means Win95 dpmi is current again, and it went back to about 162,000 loops.
So Win95 dpmi is faster.
DrV
Site Admin
Posts: 2116
Joined: May 27, 2005 18:39
Location: Midwestern USA
Contact:

Post by DrV »

As I understand it, all interrupts are reflected to vm86... is there perhaps another way to do what you would like to do without using interrupts? I could show you a memory-access test where FB would absolutely blow away any 16-bit compiler. :)
Dinosaur
Posts: 1481
Joined: Jul 24, 2005 1:13
Location: Hervey Bay (.au)

Post by Dinosaur »

Well the Dos tick count is in fact stored at some address, so every time it changes I need to know it. Currently I hook the interupt.
If it possible to Hook interupts in FB, then there should not be a vm86 switch as I am not calling the interupt. When the hardware int occurrs it simply has to update my variable.

Memory-access is the important one as well, as currently a lot of calculation is being done using "unreal" flat memory in asm.

Now knowing that int are so slow in fb, I will have to try hooking it.

Will update you guys when I have tested that.

Regards
v1ctor
Site Admin
Posts: 3804
Joined: May 27, 2005 8:08
Location: SP / Bra[s]il
Contact:

Post by v1ctor »

The problem is not FB, doing a benchmark when a interrupt is called one every interaction will certainly be slower in protected-mode DOS when compared to real-mode DOS. The DPMI server will have to do 2 context switches, going to real-mode and coming back, what will eat hundred of clocks in raw-DOS and thousands of clocks if emm386 is loaded or if inside a Windows' DOS box.

Code: Select all

 '' FBtest.bas

#include "dos/dpmi.bi"
#include "dos/go32.bi"
#include "dos/sys/farptr.bi"

#define mytimer() _farpeekl(_dos_ds, &h46c)

const SECONDS = 5


	print time$
	endt = mytimer() + cint(18.2 * SECONDS)
	cnt = 0
	do until( mytimer() >= endt )
		cnt += 1
	loop
	print time$
	
	print cnt

Code: Select all

'' QBtest.bas

const SECONDS = 5

function mytimer&

	mytimer& = peek(&h46c+0) + (peek(&h46c+1) * 256&) + (peek(&h46c+2) * 65536&) + (peek(&h46c+3) * 16777216&)

end function

	def seg = 0
	
	print time$
	endt& = mytimer& + cint(18.2 * SECONDS)
	cnt& = 0
	do until( mytimer& >= endt& )
		cnt& = cnt& + 1
	loop
	print time$
	
	print cnt&
Results here:

Code: Select all

C:\prg\code\bas\FREEBA~2>QBtest
03:30:31
03:30:36
 11387371

Code: Select all

C:\prg\code\bas\FREEBA~2>FBtest
03:30:41
03:30:46
 412184626
The FB loop compiled by fbc for DOS executes 36 times faster than in QB, due the bad code generated by QB to handle longints, and also because the lack of pointers or a PEEKL or such to be used in the mytimer routine - using doubles and TIMER wouldn't help, the result was actually 5 times slower in QB.
Dinosaur
Posts: 1481
Joined: Jul 24, 2005 1:13
Location: Hervey Bay (.au)

Post by Dinosaur »

Hi all

from DrV
I could show you a memory-access test where FB would absolutely blow away any 16-bit compiler. :)
v1ctor, you didnt need to prove it. :)
Running your test on my test rig,
qb=3,845,729
fb=35,422,971
10 times faster.

Analyses of my major projects show that I hook the following interupts.
Tick Count, updated every 1/18 sec by a hook.
KeyBoard, normally a couple of times per day.
TouchScreen, normally a couple of times per day.
A2d, 2000 times per second.

When I optimized my qb code for these, there was a significant improvement in speed, compared to polling.
To implement that in FB, I need to perfect getting the interupts serviced in protected mode. (We have proven that the switching to v86 mode is the culprit)
I have googled extensively for asm examples of console int hooks, but little success.
Although I wrote the real mode myself, it appears that the protected mode is a lot more involved.

Regards
DrV
Site Admin
Posts: 2116
Joined: May 27, 2005 18:39
Location: Midwestern USA
Contact:

Post by DrV »

Hooking interrupts is rather involved under 32-bit DOS/DPMI, though certainly possible. Be careful when writing interrupt handlers; you must lock all the memory that is accessed by the handler, including code sections, which requires knowing the size of each procedure. Also, a wrapper that sets up a stack and executes an IRET on exit should be used. Example of keyboard hook:

Code: Select all

#include "dos/go32.bi"
#include "dos/dpmi.bi"
#include "dos/pc.bi"

dim shared kb_data(127) as integer
dim shared old_kb_int as _go32_dpmi_seginfo, new_kb_int as _go32_dpmi_seginfo
private sub kb_hook()
	'' handle kb int here, as in real-mode
	dim scan as ubyte
	dim status as ubyte
	dim ascii as ushort
	
	' read the raw scan code from the keyboard
	scan = inportb(&H60)           ' read scan code
	status = inportb(&H61)         ' read keyboard status
	outportb(&H61, status Or &H80)  ' set bit 7 and write
	outportb(&H61, status)         ' write again, bit 7 clear
	outportb(&H20, &H20)           ' reset PIC
	if (scan and &H80) then
		kb_data(scan and not &H80) = 0
	else
		kb_data(scan) = 1
	end if
end sub
private sub end_kb_hook()
end sub

private sub lock_mem(byval address as uinteger, byval size as uinteger)
	dim mi as __dpmi_meminfo
	mi.address = address
	mi.size = size
	__dpmi_lock_linear_region(@mi)
end sub

private sub unlock_mem(byval address as uinteger, byval size as uinteger)
	dim mi as __dpmi_meminfo
	mi.address = address
	mi.size = size
	__dpmi_unlock_linear_region(@mi)
end sub

private sub init_kb_hook()
	_go32_dpmi_get_protected_mode_interrupt_vector(&H9, @old_kb_int)
	new_kb_int.pm_offset = @kb_handler
	new_kb_int.pm_selector = _go32_my_cs()
	_go32_dpmi_allocate_iret_wrapper(@new_kb_int)
	_go32_dpmi_set_protected_mode_interrupt_vector(&H9, @new_kb_int)
	lock_mem(@kb_data(0), ubound(kb_data) * sizeof(kb_data(0)))
	lock_mem(@kb_hook, @end_kb_hook - @kb_hook)
end sub

private sub term_kb_hook()
' put things back the way they were
	_go32_dpmi_set_protected_mode_interrupt_vector(&H9, @old_kb_int)
	unlock_mem(@kb_data(0), ubound(kb_data) * sizeof(kb_data(0)))
	unlock_mem(@kb_hook, @end_kb_hook - @kb_hook)
end sub

'''''''

init_kb_hook

do
loop until kb_data(1) ' wait for esc key

term_kb_hook
Post Reply