substrings
Re: substrings
@Vortex
your latest versions times are the same as dodicat's string2
nicely done :-)
your latest versions times are the same as dodicat's string2
nicely done :-)
Re: substrings
Hi srvaldez,
Could you repeat the test with the following modifications?
Could you repeat the test with the following modifications?
Code: Select all
MyStr="This is a very very very very very very very very very long test string."
ans= SubString(MyStr,10,65)
ans= SubString2(MyStr,10,65)
Re: substrings
Hi Vortex, here are the times using your modifications
note: PTR time = SubString2
for convenience, here's the test code, in case others want to try
note: PTR time = SubString2
Code: Select all
32-bit -gen gcc -O 2
total ASM time 2.362902699985725
total PTR time 2.842502399996818
64-bit -gen gcc -O 2
total ASM time 1.750834400038002
total PTR time 2.306091299979016
Code: Select all
'function substring2 by dodicat
'https://www.freebasic.net/forum/viewtopic.php?p=268485#p268485
function substring2(_in as string, x as long,y as long) as string
#macro memcopy(dest,src,size)
For n As Long=0 To size-1
(dest)[n]=(src)[n]
Next
#endmacro
static as zstring * 5000 g
memcopy(cast(ubyte ptr,@g),Cast(Ubyte Ptr,Strptr(_in)) + x, y)
return g
end function
#if sizeof(integer)=4 '32 bits
'by Vortex
'https://www.freebasic.net/forum/viewtopic.php?p=268605#p268605
Function SubString stdcall alias "SubString" ( s As String,StrStart As Uinteger,StrEnd As Uinteger ) As String
Dim As Zstring Ptr z
Asm
mov ecx,DWORD PTR [s]
mov esi,DWORD PTR [ecx]
mov edi,esi
add esi,DWORD PTR [StrEnd]
inc esi
mov bl,BYTE PTR [esi]
mov Byte PTR [esi],0
add edi,DWORD PTR [StrStart]
mov DWORD PTR [z],edi
End Asm
Function=*z
Asm
mov BYTE PTR [esi],bl
End Asm
End Function
#elseif sizeof(integer)=8 '64 bits
'by Vortex
'https://www.freebasic.net/forum/viewtopic.php?p=268605#p268605
Function SubString alias "SubString" ( s As String,StrStart As Uinteger,StrEnd As Uinteger ) As String
Dim As Zstring Ptr z
Asm
mov rcx,QWORD PTR [s]
mov rsi,QWORD PTR [rcx]
mov rdi,rsi
add rsi,QWORD PTR [StrEnd]
inc rsi
mov bl,BYTE PTR [rsi]
mov BYTE PTR [rsi],0
add rdi,QWORD PTR [StrStart]
mov QWORD PTR [z],rdi
End Asm
Function=*z
Asm
mov BYTE PTR [rsi],bl
End Asm
End Function
#endif
'test code by dodicat
Dim As String MyStr,ans
MyStr="This is a very very very very very very very very very long test string."
'MyStr=MyStr+MyStr
dim as double t,t2,tallya,tallyp
dim as long ln=len(MyStr)-7, lim=10000000\2
for n as long=1 to lim 'WARMUP
rnd
next n
for k as long=1 to 5
t=timer
for n as long=1 to lim
ans= SubString(MyStr,10,ln)
next n
t2=timer
tallya+=t2-t
print t2 -t,"'";ans;"'","ASM"
Print MyStr
t=timer
for n as long=1 to lim
ans= SubString2(MyStr,10,ln)
next n
t2=timer
tallyp+=t2-t
print t2-t,"'";ans;"'","Pointer"
print
next k
print
print "total ASM time ";tallya
print "total PTR time ";tallyp
sleep
Re: substrings
If this is a race then I''ll pull out all the stops.
Code: Select all
'function substring2 by dodicat
'https://www.freebasic.net/forum/viewtopic.php?p=268485#p268485
#include "crt.bi"
function substring2(_in as string, x as long,y as long) byref as const zstring
static as zstring * 5000 g=""
memcpy(cast(ubyte ptr,@g),Cast(Ubyte Ptr,Strptr(_in)) + x, y-x+1)
return g
end function
#if sizeof(integer)=4 '32 bits
'by Vortex
'https://www.freebasic.net/forum/viewtopic.php?p=268605#p268605
Function SubString stdcall alias "SubString" ( s As String,StrStart As Uinteger,StrEnd As Uinteger ) As String
Dim As Zstring Ptr z
Asm
mov ecx,DWORD PTR [s]
mov esi,DWORD PTR [ecx]
mov edi,esi
add esi,DWORD PTR [StrEnd]
inc esi
mov bl,BYTE PTR [esi]
mov Byte PTR [esi],0
add edi,DWORD PTR [StrStart]
mov DWORD PTR [z],edi
End Asm
Function=*z
Asm
mov BYTE PTR [esi],bl
End Asm
End Function
#elseif sizeof(integer)=8 '64 bits
'by Vortex
'https://www.freebasic.net/forum/viewtopic.php?p=268605#p268605
Function SubString alias "SubString" ( s As String,StrStart As Uinteger,StrEnd As Uinteger ) As String
Dim As Zstring Ptr z
Asm
mov rcx,QWORD PTR [s]
mov rsi,QWORD PTR [rcx]
mov rdi,rsi
add rsi,QWORD PTR [StrEnd]
inc rsi
mov bl,BYTE PTR [rsi]
mov BYTE PTR [rsi],0
add rdi,QWORD PTR [StrStart]
mov QWORD PTR [z],rdi
End Asm
Function=*z
Asm
mov BYTE PTR [rsi],bl
End Asm
End Function
#endif
'test code by dodicat
Dim As String MyStr,ans
MyStr="This is a very very very very very very very very very long test string."
'MyStr=MyStr+MyStr
dim as double t,t2,tallya,tallyp
dim as long ln=len(MyStr)-7, lim=10000000\2
for n as long=1 to lim 'WARMUP
rnd
next n
for k as long=1 to 5
t=timer
for n as long=1 to lim
ans= SubString(MyStr,10,ln)
next n
t2=timer
tallya+=t2-t
print t2 -t,"'";ans;"'","ASM"
Print MyStr
t=timer
for n as long=1 to lim
ans= SubString2(MyStr,10,ln)
next n
t2=timer
tallyp+=t2-t
print t2-t,"'";ans;"'","Pointer"
print
next k
print
print "total ASM time ";tallya
print "total PTR time ";tallyp
sleep
Last edited by dodicat on Feb 09, 2020 13:35, edited 1 time in total.
Re: substrings
Very fast indeed, dodicat, but:... so it continues at 00401C80:... and 00401A50 is this one:
Now, just in case you are wondering what this plethora of disassembly lines means: Function=*z ;-)
Code: Select all
push 0 ; ÚArg5 = 0
push 0 ; ³Arg4 = 0
push dword ptr [ebp-10] ; ³Arg3 => [LOCAL.4]
push -1 ; ³Arg2 = -1
lea eax, [ebp-0C] ; ³
push eax ; ³Arg1 => offset LOCAL.3
call 00401C80 ; ÀTmpFb.00401C80
Code: Select all
sub esp, 2C ; TmpFb.00401C80(guessed Arg1,Arg2,Arg3,Arg4,Arg5)
mov eax, [esp+40]
mov dword ptr [esp+14], 0 ; ÚArg6 => 0
mov [esp+10], eax ; ³Arg5 => [Arg5]
mov eax, [esp+3C] ; ³
mov [esp+0C], eax ; ³Arg4 => [Arg4]
mov eax, [esp+38] ; ³
mov [esp+8], eax ; ³Arg3 => [Arg3]
mov eax, [esp+34] ; ³
mov [esp+4], eax ; ³Arg2 => [Arg2]
mov eax, [esp+30] ; ³
mov [esp], eax ; ³Arg1 => [Arg1]
call 00401A50 ; ÀTmpFb.00401A50
sub esp, 18
add esp, 2C
retn 14
Code: Select all
push ebp ; TmpFb.00401A50(guessed Arg1,Arg2,Arg3,Arg4,Arg5,Arg6)
push edi
push esi
push ebx
sub esp, 2C
mov ebx, [esp+40]
mov esi, [esp+48]
test ebx, ebx
jz short 00401AC0
test esi, esi
jz short 00401AE0
cmp dword ptr [esp+4C], -1
je 00401B84
mov [esp], esi ; Ústring => [Arg3]
call <jmp.&msvcrt.strlen> ; ÀMSVCRT.strlen
mov ecx, esi
mov edi, eax
cmp dword ptr [esp+44], -1
je 00401B20
test edi, edi
jz short 00401AEB
mov edx, [esp+44]
mov ebp, edi
test edx, edx
jnz 00401B94
mov [esp+8], ebp ; ÚArg3
mov [esp+4], ecx ; ³Arg2
mov [esp], ebx ; ³Arg1 => [Arg1]
call 00403100 ; ÀTmpFb.00403100
sub esp, 0C
mov eax, [esp+50]
test eax, eax
jnz short 00401AFC
lea esi, [esi]
lea edi, [edi]
cmp dword ptr [esp+4C], -1
je 00401BD1
add esp, 2C
mov eax, ebx
pop ebx
pop esi
pop edi
pop ebp
retn 18
Re: substrings
well done dodicat :-)
nothing wrong with a little contest every now and then. :-)
nothing wrong with a little contest every now and then. :-)
Code: Select all
32-bit -gen gcc -O 2
total ASM time 2.334638500018031
total PTR time 0.7207269000392387
64-bit -gen gcc -O 2
total ASM time 1.744253500044579
total PTR time 0.3828349999967031
Re: substrings
Try this instead:
Code: Select all
Dim shared as zstring ptr retStr
retStr = CAllocate(8000) ' pointer to destination
Function SubStringJ naked stdcall ( s As String,StrStart As Uinteger,StrEnd As Uinteger ) As String
Asm ' SubStringJ(MyStr,10,ln)
push esi
push edi
mov esi, [esp+12] ' String
add esi, [esp+16] ' StrStart
mov ecx, [esp+20] ' String
mov edi, [retStr]
push edi
rep movsb
xor eax, eax
stosb
pop eax
pop edi
pop esi
ret 3*4
End Asm
End Function
Re: substrings
JJ
looks to me you are cheating, you are allocating a fixed amount of memory in a global variable, which is not a clean self contained function
and it only works in gas32
looks to me you are cheating, you are allocating a fixed amount of memory in a global variable, which is not a clean self contained function
and it only works in gas32
Re: substrings
Yes, I am cheating. But what dodicat posted is not a clean "assembly" version, either, because there are several other functions involved. My function just makes a memcopy, and therefore the timing is roughly the same as for the built-in FB function.
Re: substrings
Hi jj2007
I get an empty string return.
Win 10
gas and gcc
I get an empty string return.
Code: Select all
Dim shared as zstring ptr retStr
retStr = CAllocate(8000) ' pointer to destination
Function SubStringJ naked stdcall ( s As String,StrStart As Uinteger,StrEnd As Uinteger ) As String
Asm ' SubStringJ(MyStr,10,ln)
push esi
push edi
mov esi, [esp+12] ' String
add esi, [esp+16] ' StrStart
mov ecx, [esp+20] ' String
mov edi, [retStr]
push edi
rep movsb
xor eax, eax
stosb
pop eax
pop edi
pop esi
ret 3*4
End Asm
End Function
Dim As String MyStr,ans
MyStr="This is a very very very very very very very very very long test string."
dim as long ln=len(MyStr)-7
ans=SubStringj(MyStr,10,ln)
print "'";ans;"'"
sleep
gas and gcc
Re: substrings
Yes, there is a problem: FB doesn't pass a pointer to the string but rather a reference to a pointer. Add mov esi, [esi]
Re: substrings
This is all very interesting, but do you think there is any chance of this notation making it into FreeBasic?
Re: substrings
In your first post
..
a$(c:d) rather than mid$(a$,c+1,d-c+1)
..
So you can just make a macro to save writing it out, and mid is very fast, and mid can be a function or a statement, thus it is versatile.
..
a$(c:d) rather than mid$(a$,c+1,d-c+1)
..
So you can just make a macro to save writing it out, and mid is very fast, and mid can be a function or a statement, thus it is versatile.
Code: Select all
#define md(s,c,d) mid(s,c+1,d-c+1)
dim as string alphabet="abcdefghijklmnopqrstuvwxyz"
print ucase(md(alphabet,7,8))
md(alphabet,7,8)="AA"
print alphabet
sleep
Re: substrings
Hardly, I'm afraid (although perhaps in the future?...)jimg wrote:This is all very interesting, but do you think there is any chance of this notation making it into FreeBasic?
The best you can do for now is create your own string type:
Code: Select all
type _
MyString _
extends ZString
public:
declare constructor()
declare constructor( _
byref as ZString )
declare destructor()
declare operator _
let( byref as ZString )
declare operator _
cast() byref as const ZString
declare function _
between( _
byval as integer, _
byval as integer ) _
as MyString
private:
as string _
_string
end type
constructor _
MyString()
_string => ""
end constructor
constructor _
MyString( _
byref rhs as ZString )
_string => rhs
end constructor
destructor _
MyString()
end destructor
operator _
MyString.let( _
byref rhs as ZString )
_string => rhs
end operator
operator _
MyString.cast() _
byref as const ZString
return( *strPtr( _string ) )
end operator
function _
MyString.between( _
byval aStart as integer, _
byval anEnd as integer ) _
as MyString
return( iif( anEnd < aStart, _
"", mid( _string, aStart, ( anEnd - aStart ) + 1 ) ) )
end function
dim as MyString _
h => "Hello World!"
? h.between( 1, 5 )
sleep()
Re: substrings
Along those lines from first post:jimg wrote:... if I wanted the substring starting at character c and ending at character d, it would be a$(c:d) rather than mid$(a$,c+1,d-c+1) ...
Code: Select all
' SubString_Proc.bas -- (c) 2020-02-10, MrSwiss
'
' compile: -s console
'
#Define dblquot(s) ( Chr(34) + (s) + Chr(34) ) ' put a string into double quotes
Declare Function SubString(Byval As ZString Ptr, Byval As ZString Ptr, Byval As ZString Ptr) As String
' ===== DEMO code =====
Dim As String tst = "FreeBASIC is great for DIY procedure writing!"
Print "testing SubString() procedure ..." : Print
Print "original: "; dblquot(tst) : Print : Print
Print dblquot(SubString(tst, "B", "t")) ' OK
Print dblquot(SubString("", "B", "t")) ' empty source string (NOK)
Print dblquot(SubString(tst, "i", "D")) ' OK
Print dblquot(SubString(tst, "t", "w")) ' OK
Print dblquot(SubString(tst, "I", "e")) ' OK
Print dblquot(SubString(tst, "Y", "z")) ' "z" isn't in original string (NOK)
Print : Print
Print "... done ... ";
Sleep
' ===== end DEMO code =====
' implement declared Function
Private Function SubString( _ ' return a SubString ...
ByVal psrc As ZString Ptr, _ ' source ptr
Byval pfst As ZString Ptr, _ ' first char. to return
Byval plst As ZString Ptr _ ' (until) last char. to return
) As String ' SubString | "" = ERROR
Var sp = InStr(*psrc, pfst[0]) ' first occurence of start char.
Var ep = InStrRev(*psrc, plst[0]) ' last occurence of end char.
If sp > 0 AndAlso ep > 0 Then ' if no error yet --> continue
Var cnt = (ep - sp) + 1 ' calc. SubString's lenght
Return Mid(*psrc, sp, cnt) ' return result
End If
Return "" ' return ERROR
End Function
' ----- EOF -----
- first occurence of start-character -- to --
- last occurence of end-character (in the source string)
in case of ERROR (typically: one/both char's 'not found') return is:
- empty string (could be changed of course) ...