Fast string searching

Post your FreeBASIC source, examples, tips and tricks here. Please don’t post code without including an explanation.
Post Reply
AGS
Posts: 1284
Joined: Sep 25, 2007 0:26
Location: the Netherlands

Fast string searching

Post by AGS »

I've implemented the Horspool stringsearching algorithm in FreeBASIC.

Knowing that Instr has been implemented using the BoyerMoore algorithm as well I was wondering how FreeBASIC code would do when competing with optimized C code. Instr turns out to be faster but Boyer Moore Horspool is a lot faster when compared with searching for a string within a piece of text by brute force.

I've put up some test data so you can see how 'fast' the FreeBASIC version of BoyerMooreHorspool is. I've uploaded a file containing nothing but random numbers (range 0 - 1200) and a file containing the FreeBASIC manual in raw format.

Code: Select all

#include "crt/string.bi"


Function BruteForce(ByVal pattern As ZString Ptr, ByVal pattern_len As Integer, _
                    ByVal text As ZString Ptr, ByVal text_len As Integer) As Integer
   
  Dim As Integer i, j
  
  If (pattern = 0 OrElse pattern_len = 0 OrElse text = 0 OrElse text_len = 0) Then
    Return 0
  End If   
   
  'searching
  For j = 0 To (text_len - pattern_len)
     i = 0
     While (pattern[0][i] = text[0][i+j] AndAlso i < pattern_len)
       i += 1
     Wend			
     If (i >= pattern_len) Then         
        Return j + 1
     End If
     
  Next j
  Return 0
End Function



Sub PreprocessBoyerMooreBadCharacter(ByVal pattern As ZString Ptr, _
                                     ByVal pattern_len As Integer, BoyerMooreBadChar() As Integer) 
   
   Dim i As Integer
   
   For i = 0 To 255
    BoyerMooreBadChar(i) = pattern_len
   Next i
   
   
   i = 0
   While (i < pattern_len - 1)	 
      BoyerMooreBadChar(pattern[0][i]) = pattern_len - i - 1
      i += 1
   Wend	
   
End Sub

 

Function BoyerMooreHorspool(ByVal pattern As ZString Ptr, ByVal pattern_len As Integer, _
                    ByVal text As ZString Ptr, ByVal text_len As Integer) As Integer
   
   Dim As Integer position, current_character
   Dim As Integer BoyerMooreBadCharacterShift(0 To 255) 

  If (pattern = 0 OrElse pattern_len = 0 OrElse text = 0 OrElse text_len = 0) Then
    Return 0
  End If
   
   /' Preprocessing '/
   preprocessBoyerMooreBadCharacter(pattern, pattern_len, BoyerMooreBadCharacterShift())

   /' Searching '/
   position = 0
   While (position <= text_len - pattern_len) 
      current_character = text[0][position + pattern_len - 1]
      If (pattern[0][pattern_len - 1] = current_character AndAlso _
          memcmp(pattern, text + position, pattern_len - 1) = 0) Then
         Return position + 1
      End If
      position += BoyerMooreBadCharacterShift(current_character)
   Wend
   Return 0
End Function


Sub main()
  Dim As String pattern
  Dim As Double t1,t2	
  Dim As String text
  Dim As Integer fhandle  
  Dim As String filename = "numbers.txt"
  fhandle = FreeFile()
  
  If Open( filename For Binary Access Read As #fhandle ) <> 0 Then 
    Exit Sub
  End If
  
  If (LOF(fhandle) > 0) Then			
    text = String(LOF(fhandle), 0)
    If (Get( #fhandle, ,text ) <> 0) Then 
      Exit Sub
    End If			
  End If
  
  Close #fhandle 
  
  
  pattern = !"1196"
  
  t1 = Timer()
  Var result = BruteForce(StrPtr(pattern),Len(pattern),StrPtr(text),Len(text))
  t2 = Timer()	
  Print Using "Bruteforce         ######.#######";t2 - t1
  Print result
  
  t1 = Timer()
  result = BoyerMooreHorspool(StrPtr(pattern),Len(pattern),StrPtr(text),Len(text))
  t2 = Timer()
  Print Using "BoyerMooreHorspool ######.#######";t2 - t1
  Print result	 
  
  
  t1 = Timer()
  result = Instr(text,pattern)
  t2 = Timer()
  Print Using "Instr              ######.#######";t2 - t1
  Print result	
        
End Sub

main()
Post Reply