substrings

General FreeBASIC programming questions.
srvaldez
Posts: 3374
Joined: Sep 25, 2005 21:54

Re: substrings

Post by srvaldez »

@Vortex
your latest versions times are the same as dodicat's string2
nicely done :-)
Vortex
Posts: 118
Joined: Sep 19, 2005 9:50

Re: substrings

Post by Vortex »

Hi srvaldez,

Could you repeat the test with the following modifications?

Code: Select all

MyStr="This is a very very very very very very very very very long test string."

ans= SubString(MyStr,10,65)
ans= SubString2(MyStr,10,65)
srvaldez
Posts: 3374
Joined: Sep 25, 2005 21:54

Re: substrings

Post by srvaldez »

Hi Vortex, here are the times using your modifications
note: PTR time = SubString2

Code: Select all

32-bit -gen gcc -O 2
total ASM time  2.362902699985725
total PTR time  2.842502399996818
64-bit -gen gcc -O 2
total ASM time  1.750834400038002
total PTR time  2.306091299979016
for convenience, here's the test code, in case others want to try

Code: Select all

'function substring2 by dodicat
'https://www.freebasic.net/forum/viewtopic.php?p=268485#p268485

function substring2(_in as string, x as long,y as long) as string
    #macro memcopy(dest,src,size)
    For n As Long=0 To size-1
        (dest)[n]=(src)[n]
    Next
    #endmacro
  static as zstring * 5000 g
      memcopy(cast(ubyte ptr,@g),Cast(Ubyte Ptr,Strptr(_in)) + x, y)
      return g
end function

#if sizeof(integer)=4  '32 bits

'by Vortex
'https://www.freebasic.net/forum/viewtopic.php?p=268605#p268605
Function SubString stdcall alias "SubString" ( s As String,StrStart As Uinteger,StrEnd As Uinteger ) As String

Dim As Zstring Ptr z
   
    Asm
        mov  ecx,DWORD PTR [s]
        mov  esi,DWORD PTR [ecx]
        mov  edi,esi

        add  esi,DWORD PTR [StrEnd]
        inc  esi
       
        mov  bl,BYTE PTR [esi]
        mov  Byte PTR [esi],0
       
        add  edi,DWORD PTR [StrStart]
        mov  DWORD PTR [z],edi
   
    End Asm
   
    Function=*z
   
    Asm
   
        mov BYTE PTR [esi],bl

    End Asm
   
   
End Function
#elseif sizeof(integer)=8  '64 bits

'by Vortex
'https://www.freebasic.net/forum/viewtopic.php?p=268605#p268605
Function SubString alias "SubString" ( s As String,StrStart As Uinteger,StrEnd As Uinteger ) As String

Dim As Zstring Ptr z
   
    Asm
        mov  rcx,QWORD PTR [s]
        mov  rsi,QWORD PTR [rcx]
        mov  rdi,rsi

        add  rsi,QWORD PTR [StrEnd]
        inc  rsi
       
        mov  bl,BYTE PTR [rsi]
        mov  BYTE PTR [rsi],0
       
        add  rdi,QWORD PTR [StrStart]
        mov  QWORD PTR [z],rdi
   
    End Asm
   
    Function=*z
   
    Asm
   
        mov BYTE PTR [rsi],bl

    End Asm
   
   
End Function
#endif

'test code by dodicat

Dim As String MyStr,ans

MyStr="This is a very very very very very very very very very long test string."

'MyStr=MyStr+MyStr

dim as double t,t2,tallya,tallyp
dim as long ln=len(MyStr)-7, lim=10000000\2

for n as long=1 to lim 'WARMUP
    rnd
    next n

for k as long=1 to 5
  
t=timer

for n as long=1 to lim
ans= SubString(MyStr,10,ln)
next n
t2=timer
tallya+=t2-t
print t2 -t,"'";ans;"'","ASM"

Print MyStr


t=timer
for n as long=1 to lim
ans= SubString2(MyStr,10,ln)
next n
t2=timer
tallyp+=t2-t
print t2-t,"'";ans;"'","Pointer"
print
next k
print
print "total ASM time ";tallya
print "total PTR time ";tallyp
sleep
dodicat
Posts: 7979
Joined: Jan 10, 2006 20:30
Location: Scotland

Re: substrings

Post by dodicat »

If this is a race then I''ll pull out all the stops.

Code: Select all

 'function substring2 by dodicat
'https://www.freebasic.net/forum/viewtopic.php?p=268485#p268485

#include "crt.bi"
function substring2(_in as string, x as long,y as long) byref as const zstring
  static as zstring * 5000 g=""
   memcpy(cast(ubyte ptr,@g),Cast(Ubyte Ptr,Strptr(_in)) + x, y-x+1)
      return g
end function

#if sizeof(integer)=4  '32 bits

'by Vortex
'https://www.freebasic.net/forum/viewtopic.php?p=268605#p268605
Function SubString stdcall alias "SubString" ( s As String,StrStart As Uinteger,StrEnd As Uinteger ) As String

Dim As Zstring Ptr z
   
    Asm
        mov  ecx,DWORD PTR [s]
        mov  esi,DWORD PTR [ecx]
        mov  edi,esi

        add  esi,DWORD PTR [StrEnd]
        inc  esi
       
        mov  bl,BYTE PTR [esi]
        mov  Byte PTR [esi],0
       
        add  edi,DWORD PTR [StrStart]
        mov  DWORD PTR [z],edi
   
    End Asm
   
    Function=*z
   
    Asm
   
        mov BYTE PTR [esi],bl

    End Asm
   
   
End Function
#elseif sizeof(integer)=8  '64 bits

'by Vortex
'https://www.freebasic.net/forum/viewtopic.php?p=268605#p268605
Function SubString alias "SubString" ( s As String,StrStart As Uinteger,StrEnd As Uinteger ) As String

Dim As Zstring Ptr z
   
    Asm
        mov  rcx,QWORD PTR [s]
        mov  rsi,QWORD PTR [rcx]
        mov  rdi,rsi

        add  rsi,QWORD PTR [StrEnd]
        inc  rsi
       
        mov  bl,BYTE PTR [rsi]
        mov  BYTE PTR [rsi],0
       
        add  rdi,QWORD PTR [StrStart]
        mov  QWORD PTR [z],rdi
   
    End Asm
   
    Function=*z
   
    Asm
   
        mov BYTE PTR [rsi],bl

    End Asm
   
   
End Function
#endif

'test code by dodicat

Dim As String MyStr,ans

MyStr="This is a very very very very very very very very very long test string."

'MyStr=MyStr+MyStr

dim as double t,t2,tallya,tallyp
dim as long ln=len(MyStr)-7, lim=10000000\2

for n as long=1 to lim 'WARMUP
    rnd
    next n

for k as long=1 to 5
 
t=timer

for n as long=1 to lim
ans= SubString(MyStr,10,ln)
next n
t2=timer
tallya+=t2-t
print t2 -t,"'";ans;"'","ASM"

Print MyStr


t=timer
for n as long=1 to lim
ans= SubString2(MyStr,10,ln)
next n
t2=timer
tallyp+=t2-t
print t2-t,"'";ans;"'","Pointer"
print
next k
print
print "total ASM time ";tallya
print "total PTR time ";tallyp
sleep 
Last edited by dodicat on Feb 09, 2020 13:35, edited 1 time in total.
jj2007
Posts: 2326
Joined: Oct 23, 2016 15:28
Location: Roma, Italia
Contact:

Re: substrings

Post by jj2007 »

Very fast indeed, dodicat, but:

Code: Select all

push 0                           ; ÚArg5 = 0
push 0                           ; ³Arg4 = 0
push dword ptr [ebp-10]          ; ³Arg3 => [LOCAL.4]
push -1                          ; ³Arg2 = -1
lea eax, [ebp-0C]                ; ³
push eax                         ; ³Arg1 => offset LOCAL.3
call 00401C80                    ; ÀTmpFb.00401C80
... so it continues at 00401C80:

Code: Select all

sub esp, 2C                      ; TmpFb.00401C80(guessed Arg1,Arg2,Arg3,Arg4,Arg5)
mov eax, [esp+40]
mov dword ptr [esp+14], 0        ; ÚArg6 => 0
mov [esp+10], eax                ; ³Arg5 => [Arg5]
mov eax, [esp+3C]                ; ³
mov [esp+0C], eax                ; ³Arg4 => [Arg4]
mov eax, [esp+38]                ; ³
mov [esp+8], eax                 ; ³Arg3 => [Arg3]
mov eax, [esp+34]                ; ³
mov [esp+4], eax                 ; ³Arg2 => [Arg2]
mov eax, [esp+30]                ; ³
mov [esp], eax                   ; ³Arg1 => [Arg1]
call 00401A50                    ; ÀTmpFb.00401A50
sub esp, 18
add esp, 2C
retn 14
... and 00401A50 is this one:

Code: Select all

push ebp                         ; TmpFb.00401A50(guessed Arg1,Arg2,Arg3,Arg4,Arg5,Arg6)
push edi
push esi
push ebx
sub esp, 2C
mov ebx, [esp+40]
mov esi, [esp+48]
test ebx, ebx
jz short 00401AC0
test esi, esi
jz short 00401AE0
cmp dword ptr [esp+4C], -1
je 00401B84
mov [esp], esi                   ; Ústring => [Arg3]
call <jmp.&msvcrt.strlen>        ; ÀMSVCRT.strlen
mov ecx, esi
mov edi, eax
cmp dword ptr [esp+44], -1
je 00401B20
test edi, edi
jz short 00401AEB
mov edx, [esp+44]
mov ebp, edi
test edx, edx
jnz 00401B94
mov [esp+8], ebp                 ; ÚArg3
mov [esp+4], ecx                 ; ³Arg2
mov [esp], ebx                   ; ³Arg1 => [Arg1]
call 00403100                    ; ÀTmpFb.00403100
sub esp, 0C
mov eax, [esp+50]
test eax, eax
jnz short 00401AFC
lea esi, [esi]
lea edi, [edi]
cmp dword ptr [esp+4C], -1
je 00401BD1
add esp, 2C
mov eax, ebx
pop ebx
pop esi
pop edi
pop ebp
retn 18
Now, just in case you are wondering what this plethora of disassembly lines means: Function=*z ;-)
srvaldez
Posts: 3374
Joined: Sep 25, 2005 21:54

Re: substrings

Post by srvaldez »

well done dodicat :-)
nothing wrong with a little contest every now and then. :-)

Code: Select all

32-bit -gen gcc -O 2
total ASM time  2.334638500018031
total PTR time  0.7207269000392387
64-bit -gen gcc -O 2
total ASM time  1.744253500044579
total PTR time  0.3828349999967031
jj2007
Posts: 2326
Joined: Oct 23, 2016 15:28
Location: Roma, Italia
Contact:

Re: substrings

Post by jj2007 »

Try this instead:

Code: Select all

Dim shared as zstring ptr retStr
retStr = CAllocate(8000)         ' pointer to destination

Function SubStringJ naked stdcall ( s As String,StrStart As Uinteger,StrEnd As Uinteger ) As String
  Asm	' SubStringJ(MyStr,10,ln)
  push esi
  push edi
  mov esi, [esp+12]	' String
  add esi, [esp+16]	' StrStart
  mov ecx, [esp+20]	' String
  mov edi, [retStr]
  push edi
  rep movsb
  xor eax, eax
  stosb
  pop eax
  pop edi
  pop esi
  ret 3*4
  End Asm
End Function
srvaldez
Posts: 3374
Joined: Sep 25, 2005 21:54

Re: substrings

Post by srvaldez »

JJ
looks to me you are cheating, you are allocating a fixed amount of memory in a global variable, which is not a clean self contained function
and it only works in gas32
jj2007
Posts: 2326
Joined: Oct 23, 2016 15:28
Location: Roma, Italia
Contact:

Re: substrings

Post by jj2007 »

Yes, I am cheating. But what dodicat posted is not a clean "assembly" version, either, because there are several other functions involved. My function just makes a memcopy, and therefore the timing is roughly the same as for the built-in FB function.
dodicat
Posts: 7979
Joined: Jan 10, 2006 20:30
Location: Scotland

Re: substrings

Post by dodicat »

Hi jj2007
I get an empty string return.

Code: Select all

 Dim shared as zstring ptr retStr
retStr = CAllocate(8000)         ' pointer to destination

Function SubStringJ naked stdcall ( s As String,StrStart As Uinteger,StrEnd As Uinteger ) As String
  Asm   ' SubStringJ(MyStr,10,ln)
  push esi
  push edi
  mov esi, [esp+12]   ' String
  add esi, [esp+16]   ' StrStart
  mov ecx, [esp+20]   ' String
  mov edi, [retStr]
  push edi
  rep movsb
  xor eax, eax
  stosb
  pop eax
  pop edi
  pop esi
  ret 3*4
  End Asm
End Function

Dim As String MyStr,ans

MyStr="This is a very very very very very very very very very long test string."
dim as long ln=len(MyStr)-7

ans=SubStringj(MyStr,10,ln)
print "'";ans;"'"
sleep 
Win 10
gas and gcc
jj2007
Posts: 2326
Joined: Oct 23, 2016 15:28
Location: Roma, Italia
Contact:

Re: substrings

Post by jj2007 »

Yes, there is a problem: FB doesn't pass a pointer to the string but rather a reference to a pointer. Add mov esi, [esi]
jimg
Posts: 24
Joined: Jan 16, 2020 19:43
Location: Oregon

Re: substrings

Post by jimg »

This is all very interesting, but do you think there is any chance of this notation making it into FreeBasic?
dodicat
Posts: 7979
Joined: Jan 10, 2006 20:30
Location: Scotland

Re: substrings

Post by dodicat »

In your first post
..
a$(c:d) rather than mid$(a$,c+1,d-c+1)
..
So you can just make a macro to save writing it out, and mid is very fast, and mid can be a function or a statement, thus it is versatile.

Code: Select all




#define md(s,c,d) mid(s,c+1,d-c+1)

dim as string alphabet="abcdefghijklmnopqrstuvwxyz"

print ucase(md(alphabet,7,8))

md(alphabet,7,8)="AA"
print alphabet
sleep
 
paul doe
Moderator
Posts: 1730
Joined: Jul 25, 2017 17:22
Location: Argentina

Re: substrings

Post by paul doe »

jimg wrote:This is all very interesting, but do you think there is any chance of this notation making it into FreeBasic?
Hardly, I'm afraid (although perhaps in the future?...)

The best you can do for now is create your own string type:

Code: Select all

type _
  MyString _
  extends ZString
  
  public:
    declare constructor()
    declare constructor( _
      byref as ZString )
    declare destructor()
    
    declare operator _
      let( byref as ZString )
    declare operator _
      cast() byref as const ZString
    
    declare function _
      between( _
        byval as integer, _
        byval as integer ) _
      as MyString
    
  private:
    as string _
      _string
end type

constructor _
  MyString()
  
  _string => ""
end constructor

constructor _
  MyString( _
    byref rhs as ZString )
  
  _string => rhs
end constructor

destructor _
  MyString()
end destructor

operator _
  MyString.let( _
    byref rhs as ZString )
  
  _string => rhs
end operator

operator _
  MyString.cast() _
  byref as const ZString
  
  return( *strPtr( _string ) )
end operator

function _
  MyString.between( _
    byval aStart as integer, _
    byval anEnd as integer ) _
  as MyString
  
  return( iif( anEnd < aStart, _
    "", mid( _string, aStart, ( anEnd - aStart ) + 1 ) ) )
end function

dim as MyString _
  h => "Hello World!"

? h.between( 1, 5 )

sleep()
But naturally that's no substitute...
MrSwiss
Posts: 3910
Joined: Jun 02, 2013 9:27
Location: Switzerland

Re: substrings

Post by MrSwiss »

jimg wrote:... if I wanted the substring starting at character c and ending at character d, it would be a$(c:d) rather than mid$(a$,c+1,d-c+1) ...
Along those lines from first post:

Code: Select all

' SubString_Proc.bas -- (c) 2020-02-10, MrSwiss
'
' compile: -s console
'

#Define dblquot(s)  ( Chr(34) + (s) + Chr(34) ) ' put a string into double quotes

Declare Function SubString(Byval As ZString Ptr, Byval As ZString Ptr, Byval As ZString Ptr) As String

' ===== DEMO code =====
Dim As String   tst = "FreeBASIC is great for DIY procedure writing!"

Print "testing SubString() procedure ..." : Print
Print "original: "; dblquot(tst) : Print : Print

Print dblquot(SubString(tst, "B", "t")) ' OK
Print dblquot(SubString("",  "B", "t")) ' empty source string (NOK)
Print dblquot(SubString(tst, "i", "D")) ' OK
Print dblquot(SubString(tst, "t", "w")) ' OK
Print dblquot(SubString(tst, "I", "e")) ' OK
Print dblquot(SubString(tst, "Y", "z")) ' "z" isn't in original string (NOK)

Print : Print
Print "... done ... ";
Sleep
' ===== end DEMO code =====

' implement declared Function
Private Function SubString( _           ' return a SubString ...
    ByVal psrc  As ZString Ptr, _       ' source ptr
    Byval pfst  As ZString Ptr, _       ' first char. to return
    Byval plst  As ZString Ptr  _       ' (until) last char. to return
    ) As String                         ' SubString | "" = ERROR
    Var sp = InStr(*psrc, pfst[0])      ' first occurence of start char.
    Var ep = InStrRev(*psrc, plst[0])   ' last occurence of end char.
    If sp > 0 AndAlso ep > 0 Then       ' if no error yet --> continue
        Var cnt = (ep - sp) + 1         ' calc. SubString's lenght
        Return Mid(*psrc, sp, cnt)      ' return result
    End If
    Return ""                           ' return ERROR
End Function
' ----- EOF -----
It takes everything from:
- first occurence of start-character -- to --
- last occurence of end-character (in the source string)
in case of ERROR (typically: one/both char's 'not found') return is:
- empty string (could be changed of course) ...
Post Reply