simple, dirty huffman coding example

Post your FreeBASIC source, examples, tips and tricks here. Please don’t post code without including an explanation.
Post Reply
Mindless
Posts: 110
Joined: Jun 25, 2005 14:50
Location: USA

simple, dirty huffman coding example

Post by Mindless »

yup, this code is dirty...

I'm not sure I'm handling files that have only one type of byte in them since the root of the binary tree is a leaf node... :/
don't try to compress a 0 byte file, there is no error checking

Code: Select all

option explicit

'' simple binary tree
namespace bt

  type node_t
    index      as integer
    node_flags as ubyte
    node(1)    as node_t ptr
  end type
  
  const leaf_node = 0, branch_node = 1
  
  function create_leaf (index as integer = 0) as node_t ptr
    dim as node_t ptr node_new = callocate(sizeof(node_t))
    
    node_new->index = index
    node_new->node_flags = leaf_node
    
    return node_new
  end function
  
  function create_branch (index as integer = 0, byval node0 as node_t ptr = 0, byval node1 as node_t ptr = 0) as node_t ptr
    dim as node_t ptr node_new = callocate(sizeof(node_t))
    
    node_new->index = index
    node_new->node_flags = branch_node
    node_new->node(0) = node0
    node_new->node(1) = node1
    
    return node_new
  end function
  
  function destroy (byval node_given as node_t ptr) as ubyte
    if node_given->node_flags and branch_node then
      if node_given->node(0) then destroy(node_given->node(0))
      if node_given->node(1) then destroy(node_given->node(1))
    end if
    
    deallocate(node_given)
        
    return 0
  end function
  
  function calculate_size (byval node_given as node_t ptr) as uinteger
    dim as uinteger size
    
    if node_given->node_flags and branch_node then
      if node_given->node(0) then size += calculate_size(node_given->node(0))
      if node_given->node(1) then size += calculate_size(node_given->node(1))
    else
      size += 8
    end if
    size += 1
        
    return size
  end function
  
  function save_tree (byval node_given as node_t ptr, bt_data as ubyte ptr, bt_data_ptr as uinteger = 0, bt_data_bit_ptr as byte = 8) as ubyte
    if bt_data_bit_ptr = 0 then
      bt_data_ptr += 1
      bt_data_bit_ptr = 8
    end if
    bt_data_bit_ptr -= 1
    
    if node_given->node_flags and branch_node then
      bt_data[bt_data_ptr] = bitreset(bt_data[bt_data_ptr], bt_data_bit_ptr)
      if node_given->node(0) then save_tree(node_given->node(0), bt_data, bt_data_ptr, bt_data_bit_ptr)
      if node_given->node(1) then save_tree(node_given->node(1), bt_data, bt_data_ptr, bt_data_bit_ptr)
    else
      bt_data[bt_data_ptr] = bitset(bt_data[bt_data_ptr], bt_data_bit_ptr)
      dim as ubyte i
      for i = 0 to 7
        if bt_data_bit_ptr = 0 then
          bt_data_ptr += 1
          bt_data_bit_ptr = 8
        end if
        bt_data_bit_ptr -= 1
        if bit(node_given->index, 7 - i) then
          bt_data[bt_data_ptr] = bitset(bt_data[bt_data_ptr], bt_data_bit_ptr)
        else
          bt_data[bt_data_ptr] = bitreset(bt_data[bt_data_ptr], bt_data_bit_ptr)
        end if
      next
    end if
    
    return 0
  end function
  
  function load_tree (byval bt_data as ubyte ptr, bt_data_ptr as uinteger = 0, bt_data_bit_ptr as byte = 8) as node_t ptr
    dim as node_t ptr return_node
    
    if bt_data_bit_ptr = 0 then
      bt_data_ptr += 1
      bt_data_bit_ptr = 8
    end if
    bt_data_bit_ptr -= 1
    
    if bit(bt_data[bt_data_ptr], bt_data_bit_ptr) = 0 then
      return_node = create_branch(-1)
      return_node->node(0) = load_tree(bt_data, bt_data_ptr, bt_data_bit_ptr)
      return_node->node(1) = load_tree(bt_data, bt_data_ptr, bt_data_bit_ptr)
    else
      dim as uinteger index
      dim as ubyte i
      for i = 0 to 7
        if bt_data_bit_ptr = 0 then
          bt_data_ptr += 1
          bt_data_bit_ptr = 8
        end if
        bt_data_bit_ptr -= 1
        if bit(bt_data[bt_data_ptr], bt_data_bit_ptr) then
          index = bitset(index, 7 - i)
        else
          index = bitreset(index, 7 - i)
        end if
      next
      return_node = create_leaf(index)
    end if
    
    return return_node
  end function
  
  function dump (byval node_given as node_t ptr, indent as ubyte = 0) as ubyte
    if node_given->node_flags and branch_node then
      print string(indent, 32) & "branch: " & node_given->index
      if node_given->node(0) then dump(node_given->node(0), indent + 2)
      if node_given->node(1) then dump(node_given->node(1), indent + 2)
    else
      print string(indent, 32) & "leaf: " & node_given->index
    end if
    
    return 0
  end function
  
  function qtable (byval node_given as node_t ptr, path() as uinteger, bits() as ubyte, byref cpath as uinteger = 0, byref cbits as ubyte = 0) as ubyte
    if cbits > 31 then print "QTABLE FAILED!" : stop
    if node_given->node_flags and branch_node then
      if node_given->node(0) then qtable(node_given->node(0), path(), bits(), (cpath shl 1), cbits + 1)
      if node_given->node(1) then qtable(node_given->node(1), path(), bits(), (cpath shl 1) or 1, cbits + 1)
    else
      path(node_given->index) = cpath
      bits(node_given->index) = cbits
    end if

    return 0
  end function
  
end namespace

'' simple linked list
namespace ll

  type node_t
    index      as integer
    prev_node  as node_t ptr
    next_node  as node_t ptr
    foo        as any ptr
  end type

  const as uinteger no_node = 0
  
  declare function create (index as integer = 0) as node_t ptr
  declare function destroy (byval node_given as node_t ptr) as node_t ptr
  declare function insert_after (byval node_given as node_t ptr, index as integer = 0) as node_t ptr
  declare function delete (byval node_given as node_t ptr) as node_t ptr
  declare function nswap (byval node_given1 as node_t ptr, byval node_given2 as node_t ptr) as ubyte
  declare function seek_first (byval node_given as node_t ptr) as node_t ptr
  declare function sort_index (byval node_given as node_t ptr) as ubyte
  declare function count (byval node_given as node_t ptr) as uinteger
  declare function dump (byval node_given as node_t ptr) as ubyte
  
  function create (index as integer = 0) as node_t ptr
    dim as node_t ptr node_new = callocate(sizeof(node_t))
    node_new->index = index
    return node_new
  end function

  function destroy (byval node_given as node_t ptr) as node_t ptr
    dim as node_t ptr node_current = node_given
    
    node_current = seek_first(node_current)
    
    do until (node_current->next_node = no_node)
      node_current = node_current->next_node
      deallocate(node_current->prev_node)
    loop
    
    deallocate(node_current)
    
    return no_node
  end function

  function insert_after (byval node_given as node_t ptr, index as integer = 0) as node_t ptr
    dim as node_t ptr node_new = callocate(sizeof(node_t))
    
    if not(node_given->next_node = no_node) then
      node_new->next_node = node_given->next_node
      node_given->next_node->prev_node = node_new
    end if
    
    node_given->next_node = node_new
    node_new->prev_node = node_given
    
    node_new->index = index
    
    return node_new
  end function

  function delete (byval node_given as node_t ptr) as node_t ptr
    dim as node_t ptr node_current
    
    if (node_given->prev_node = no_node) and (node_given->prev_node = no_node) then
      node_current = no_node
    elseif (node_given->prev_node = no_node) then
      node_current = node_given->next_node
      node_given->next_node->prev_node = no_node
    elseif (node_given->next_node = no_node) then
      node_current = node_given->prev_node
      node_given->prev_node->next_node = no_node
    else
      node_current = node_given->next_node
      node_given->prev_node->next_node = node_given->next_node
      node_given->next_node->prev_node = node_given->prev_node
    end if
    
    deallocate(node_given)
    
    return node_current
  end function

  function nswap (byval node_given1 as node_t ptr, byval node_given2 as node_t ptr) as ubyte
    swap node_given1->index, node_given2->index
    swap node_given1->foo, node_given2->foo
    return 0
  end function

  function seek_first (byval node_given as node_t ptr) as node_t ptr
    dim as node_t ptr node_current = node_given
    do until (node_current->prev_node = no_node)
      node_current = node_current->prev_node
    loop
    return node_current
  end function

  function sort_index (byval node_given as node_t ptr) as ubyte
    dim as node_t ptr node_current = node_given
    dim as node_t ptr node_grab, node_match
    
    node_current = seek_first(node_current)
    node_grab = node_current
    
    do until (node_grab->next_node = no_node)
      node_match = node_grab
      node_current = node_grab
      do until (node_current->next_node = no_node)
        node_current = node_current->next_node
        if (node_current->index < node_match->index) then node_match = node_current
      loop
      if (node_match <> node_grab) then nswap(node_match, node_grab)
      node_grab = node_grab->next_node
    loop
    
    return 0
  end function

  function count (byval node_given as node_t ptr) as uinteger
    dim as node_t ptr node_current = node_given
    dim as uinteger ncount = 1
    
    node_current = seek_first(node_current)
    
    do until (node_current->next_node = no_node)
      node_current = node_current->next_node
      ncount += 1
    loop
    
    return ncount
  end function

  function dump (byval node_given as node_t ptr) as ubyte
    dim as node_t ptr node_current = node_given
    
    node_current = seek_first(node_current)
    
    print "index", "foo", "previous", "next"
    do
      print hex(node_current->index, 8), hex(node_current->foo, 8),;
      if not(node_current->prev_node = no_node) then
        print hex(node_current->prev_node->index, 8),;
      else
        print " -",;
      end if
      if not(node_current->next_node = no_node) then
        print hex(node_current->next_node->index, 8)
      else
        print " -"
        exit do
      end if
      node_current = node_current->next_node
    loop
    
    return 0
  end function

end namespace

select case command$(1)
case "c"
  dim as ubyte ptr ddata, cdata, tdata
  dim as uinteger ds, cs, ts, di, ci  
  dim as ubyte cb = 8
  
  dim as uinteger  f  = freefile
  open command$(2) for binary access read as #f
  ds = lof(f)
  ddata = callocate(ds)
  get #f, , *ddata, ds
  close #f
  
  print "  building tree..."
  
  dim as uinteger u(255)
  
  for di = 0 to ds - 1
    u(ddata[di]) += 1
  next
  
  dim as ll.node_t ptr l
  dim as bt.node_t ptr b
  
  dim as uinteger i
  for i = 0 to 255
    if u(i) then
      if l then
        l = ll.insert_after(l, u(i))
      else
        l = ll.create(u(i))
      end if
      l->foo = bt.create_leaf(i)
    end if
  next
  
  do while ll.count(l) > 1
    ll.sort_index(l)
    l = ll.seek_first(l)
    
    l->foo = bt.create_branch(-1, l->foo, l->next_node->foo)
    l->index += l->next_node->index
    ll.delete(l->next_node)
  loop
  b = l->foo
  ll.destroy(l)
  
  ts = (bt.calculate_size(b) + 7) \ 8
  tdata = callocate(ts)
  
  bt.save_tree(b, tdata)
  
  dim as uinteger o = freefile
  open command$(3) for binary access write as #o
  put #o, , "HUFF"
  put #o, , ts
  seek #f, 13
  put #o, , ds
  put #o, , *tdata, ts
  
  dim as uinteger qtp(255)
  dim as ubyte qtb(255)
  bt.qtable(b, qtp(), qtb())
  
  print "  compressing...";
  
  cdata = callocate(ds)
  for di = 0 to ds - 1
    if qtb(ddata[di]) then
      for i = 0 to qtb(ddata[di]) - 1
        if cb = 0 then
          ci += 1
          cb = 8
        end if
        cb -= 1
        cdata[ci] shl= 1
        cdata[ci] or= abs(bit(qtp(ddata[di]), qtb(ddata[di]) - i - 1))
      next
    end if
    if di mod 1024 = 0 then locate , 80 - 26 : print " " & string(fix(di / ds * 20), ".") & string(20 - fix(di / ds * 20), "o") & " " & fix(di / ds * 100) & "%";
  next
  
  cdata[ci] shl= cb
  
  put #o, , *cdata, ci + 1
  put #o, 9, ci + 1
  
  bt.destroy(b)
  
  deallocate(ddata)
  deallocate(cdata)
  
case "d"
  dim as ubyte ptr tdata, cdata, ddata
  dim as uinteger  ts, cs, ds, ci, di
  dim as ubyte cb = 8
  
  dim as uinteger f = freefile
  open command$(2) for binary access read as #f
  
  seek #f, 5
  get #f, , ts
  tdata = callocate(ts)
  get #f, , cs
  cdata = callocate(cs)
  get #f, , ds
  ddata = callocate(ds)
  get #f, , *tdata, ts
  get #f, , *cdata, cs
  
  close #f
  
  print "  reading tree..."
  
  dim as bt.node_t ptr tr = bt.load_tree(tdata), tc
  deallocate(tdata)
  
  print "  decompressing...";
  
  do while di < ds
    tc = tr
    do while tc->node_flags and bt.branch_node
      if cb = 0 then
        ci += 1
        cb = 8
      end if
      cb -= 1
      tc = tc->node(abs(bit(cdata[ci], cb)))
    loop
    ddata[di] = tc->index
    di += 1
    if di mod 1024 = 0 then locate , 80 - 26 : print " " & string(fix(di / ds * 20), ".") & string(20 - fix(di / ds * 20), "o") & " " & fix(di / ds * 100) & "%";
  loop
  
  dim as uinteger o = freefile
  open command$(3) for binary access write as #o
  
  put #o, , *ddata, ds
  
  close #o
  
  bt.destroy(tr)
  
  deallocate(cdata)
  deallocate(ddata)
  
case "t"
  dim as ubyte ptr tdata
  dim as uinteger  ts, cs, ds
  
  dim as uinteger f = freefile
  open command$(2) for binary access read as #f
  
  seek #f, 5
  get #f, , ts
  tdata = callocate(ts)
  get #f, , cs
  get #f, , ds
  get #f, , *tdata, ts
  
  close #f
  
  dim as bt.node_t ptr tr = bt.load_tree(tdata), tc
  deallocate(tdata)
  
  bt.dump(tr)
  
  bt.destroy(tr)
  
case else
  print command$(0) & " c|d src dst"
  
end select
Mihail_B
Posts: 273
Joined: Jan 29, 2008 11:20
Location: Romania
Contact:

Re: simple, dirty huffman coding example

Post by Mihail_B »

I had to learn this [Huffman Encoding/Decoding] for one of my univ exams ...

By the way ... you can find so tuturials for different algorithms at http://jhave.org (like LZW, huffman, Topological sort, etc ....,etc...)

This code sample is kind of tutorial/debug for learning huffman encoding(compression)/decoding
For encoding : root=huffman_enc_setup(@in,Len(in),@c)
For decoding : huffman_decoding(@in,@ll,c,Len(*c),root)

Code: Select all

'copyright (c) 1996-2012 alphax (r) team
'programmed by mihai barboi
'thanks to:fmi.unibuc.ro,ocw.mit.edu,elth.pub.ro,wiki,ichb,sc24,jhave.org
'based on my univ courses and JHave ( http://jhave.org )

'Huffman Encoding/Decoding Algorithm ...

Type hu_t
   As Integer freq
   As Integer ch
   As ZString*16 enc
   As hu_t Ptr left1
   As hu_t Ptr right1
   As hu_t Ptr parent
End Type

'like C/C++ function memset(...)
#Ifndef stosb1
sub stosb1(c as ubyte, dst as any ptr, l as uinteger)
        asm
                push edi
                push ecx
                push eax
                pushf
                mov edi,[dst]
                mov ecx,[l]
                mov al,[c]
                cld
                rep stosb
                popf
                pop eax
                pop ecx
                pop edi
        end asm
end Sub
#endif
'I used insertion sort ... 'cause it gives better results for small arrays (we will sort CHARs frequencies)
Sub sort_hu(hu As hu_t Ptr, l As Integer)
   Dim As Integer i,k,z
   Dim As hu_t temp
   For i=1 To l-1
      z=i
      k=i-1
      While ((hu[z].freq<hu[k].freq) AndAlso (k>=0))
         temp=hu[z]
         hu[z]=hu[k]
         hu[k]=temp
         z-=1
         k-=1
      Wend
   Next i
End Sub

'appends encription bits to the left side
Sub append2enc(h As hu_t Ptr, older As UByte)
   If older=0 Then
      h->left1->enc="0"+h->left1->enc
      h->right1->enc="1"+h->right1->enc
   Else
      h->left1->enc=Chr(older)+h->left1->enc
      h->right1->enc=Chr(older)+h->right1->enc
   EndIf
   #Ifndef huffman_as_include
   Print "[L";Chr(h->left1->ch);" ";h->left1->freq;" ";h->left1->enc;"]";
   Print "[R";Chr(h->right1->ch);" ";h->right1->freq;" ";h->right1->enc;"]";
   #EndIf
   If h->left1->left1<>0 Then
      If older=0 then append2enc(h->left1,Asc("0")) Else append2enc(h->left1,older) 
   EndIf
   If h->right1->right1<>0 Then 
      If older=0 then appenD2enc(h->right1,Asc("1")) Else append2enc(h->right1,older)
   EndIf
End Sub

'displays the huffman's binary tree in IN-ORDER (LEFT child, ROOT, RIGHT child)
Sub printtree(h As hu_t Ptr,lvl As Integer)
   If h=0 Then Return
   printtree(h->left1,lvl+1)
   Print "lvl=";lvl;" freq=";h->freq;" enc=[";h->enc;" ch=";Chr(h->ch);"]"
   printtree(h->right1,lvl+1)
End Sub
Function get_huff_enc(hu As hu_t Ptr, c As Integer) As hu_T Ptr
   If hu=0 Then Return(0)
   'Print "c=";c;" freq=";hu->freq;" enc=[";hu->enc;" ch=";Chr(hu->ch);"]"
   If hu->ch=c Then Return(hu)
   Dim As hu_t Ptr i
   i=get_huff_enc(hu->left1,c)
   If i<>0 Then Return(i)
   Return(get_huff_enc(hu->right1,c))
End Function
Function binstr2bytes(c As ZString Ptr, l As Integer,o As UByte Ptr) As Integer
   If c=0 Or l=0 Or o=0 Then Return 0
   Dim As Integer i,n,k
   k=0
   For i=0 To l-1
      'Print(c[i]-Asc("0")),(i And 7),k
      o[k] Or=((c[i]-Asc("0")) Shl (i And 7))
      If ((i+1) And 7)=0 Then k+=1
   Next i
   Return k
End Function
Function bytes2binstr(c As UByte Ptr, l As Integer, o As ZString Ptr) As Integer
   If c=0 Or l=0 Or o=0 Then Return 0
   Dim As Integer i,n,k
   k=0
   n=(l Shl 3)-1
   For i=0 To n
      o[i]= CUByte(IIf(Bit(c[k],i And 7)=0,Asc("0"),Asc("1")))
      If ((i+1) And 7)=0 Then k+=1   
   Next i
   Return n+1
End Function
Function huffman_enc_reuse(hu As hu_t Ptr,s As ZString Ptr, len1 As Integer,enc As Any Ptr) As Integer
   If hu=0 Or s=0 Or len1=0 Or enc=0 Then Return 0
   Dim As Integer i,t
   Dim As ZString Ptr e
   Dim As hu_t Ptr h
   e=Callocate((len1 Shl 3)+1)
   *CPtr(UInteger Ptr,enc)=e
   For i=0 To len1-1
      t=s[i]
      h=get_huff_enc(hu,t)
      If h<>0 Then *e+=h->enc
   Next i
   Return(Len(*e))
End Function
'huffman encription function
Function huffman_enc_setup(s As ZString Ptr,len1 As Integer, enc As Any ptr) As hu_t Ptr
   Dim As Integer i,n,k,la
   Dim As UByte Ptr l=New UByte[256]
   Dim As hu_t Ptr list,old_list
   Dim As hu_t Ptr newh,te
   n=0
   For i=0 To len1-1
      n-=IIf(l[s[i]]=0,0,1)
      l[s[i]]+=1
      n+=1
   Next i
   #Ifndef huffman_as_include
   Print "n=";n
   #endif
   If n=0 Then
      Delete l
      Return 0
   EndIf
   list = New hu_t[n Shl 1]
   k=0
   For i=0 To 255
      If l[i]>0 Then
         stosb1 0,@list[k],Len(hu_t)
         list[k].freq=l[i]
         list[k].ch=i
         list[k].enc=""
         K+=1
      EndIf
   Next i
   k=n
   old_list=list
   la=n
   While (k>1)
      #Ifndef huffman_as_include
      Print "k=";k
      Print "unsorted list:";:For i=0 To k-1: Print list[i].freq;" ";: Next i:Print
      #EndIf
      sort_hu(list,k)
      #Ifndef huffman_as_include
      Print "##sorted list:";:For i=0 To k-1: Print list[i].freq;" ";: Next i:Print
      #EndIf
      ''merge first 2
      newh=@list[k]
      newh->freq=list[0].freq+list[1].freq
      newh->ch=0
      newh->enc=""
      newh->left1=@list[0]
      newh->right1=@list[1]
      newh->parent=0
      append2enc(newh,0)
      list=@list[2]
      k-=1
   Wend
   #Ifndef huffman_as_include
   Print "k=";k
   Print "unsorted list:";:For i=0 To k-1: Print list[i].freq;" ";: Next i:Print
   printtree(@list[0],0)
   #EndIf
   
   If enc<>0 Then
      *CPtr(UInteger Ptr,enc)=callocate((len1 Shl 3)+8)
      Dim As ZString Ptr ee=*CPtr(UInteger Ptr,enc)
      For i=0 To len1-1
         /'
         k=0
         While (old_list[k].ch<>s[i])
            k+=1
         Wend
         *ee+=old_list[k].enc
         '/
         k=s[i]
         te=get_huff_enc(@list[0],k)
         If te<>0 Then *ee+=te->enc
      Next i
   EndIf
   'Delete old_list
   Delete l
   Return (newh)
End Function
Function get_huff_char(hu As hu_t ptr,enc As UByte ptr,l As Integer ptr,lvl As integer) As UByte
	if (hu=0) Then return(0)
	if ((left(*CPtr(ZString Ptr,enc),lvl)=Left(hu->enc,lvl)) Or (lvl=0)) Then
		if ((hu->left1=0) And(hu->right1=0)) Then
			*l=lvl
			return(hu->ch)
		EndIf
		Dim As UByte c
		if (enc[lvl]=Asc("0")) then
			return(get_huff_char(hu->left1,enc,l,lvl+1))
		ElseIf(enc[lvl]=Asc("1")) then
			return(get_huff_char(hu->right1,enc,l,lvl+1))
		EndIf
	EndIf
	return(0)
End Function
sub huffman_decoding(txt As UByte ptr,len1 As Integer ptr,enc As UByte ptr,l As Integer,hu As hu_t ptr) 
	if ((hu=0)or(enc=0)or(l=0)or(txt=0)) Then Return
	Dim As Integer i,k,ll
	Dim As hu_t Ptr h
	i=0:k=0
	while (i<l)
		txt[k]=get_huff_char(hu,cptr(ubyte Ptr,@enc[i]),@ll,0)
		k+=1
		i+=ll
	Wend
	if (len1<>0) then *len1=k
End Sub

#Ifndef huffman_as_include
' main
Dim As ZString Ptr c
Dim As ZString*512 in
Dim As hu_t Ptr root
Dim As Integer ll
Dim As UByte Ptr b
Dim As Integer i,k

Print "Huffman Encoding ..."
Line Input "string for encoding/compressing:",in
Print "String Length=";Len(in)
root=huffman_enc_setup(@in,Len(in),@c)
Print "---------------------------------"
Print "input=";in
Print "encripted [as bits]=";*c
Print "EncLen=";Len(*c); "bits / InputLen=";Len(in) Shl 3;"bits / Compression Ration:";CInt((Len(*c)/(Len(in) Shl 3))*1000)/10;"%"
b=Callocate((Len(*c) Shr 3)+4)
k=binstr2bytes(c,Len(*c),b)
Print "# ";
For i=0 To k
   Print Hex(b[i]);" ";
Next i
Print
Kill "\c.txt"
Open "\c.txt" For Binary As #1
Put #1,1,*c,Len(*c)+1
Put #1,,*b,k
Put #1,,*CPtr(UByte Ptr,@in),Len(in)+1
Close #1
DeAllocate b
in=""
huffman_decoding(@in,@ll,c,Len(*c),root)
Print "decoded as:";in
Print "len=";ll
Print "[Tests using the same binary tree obtained from above]"
in=""
Line Input "Enter encoded text as 1s and 0s:",*c
huffman_decoding(@in,@ll,c,Len(*c),root)
Print "enc=";*c
Print "decoded as:";in
Print "len=";ll
Print

DeAllocate c
Line Input "Test to encode, reuseing the same binary tree:",in
ll=huffman_enc_reuse(root,@in,Len(in),@c)
Print "enc=";*c
Print "decoded as:";in
Print "len=";ll
Print
B=Callocate((ll Shr 3)+8)

k=binstr2bytes(c,ll,b)
Print "seen as bytes of len=";k
Print "# ";
For i=0 To k
   Print Hex(b[i]);" ";
Next i
Print
Print "back to encoded string :";
stosb1 0,c,ll+1
ll=bytes2binstr(b,(ll Shr 3)+1,c)
Print "len=";k
Print "# ";*c
 
DeAllocate b
DeAllocate c
Sleep
#EndIf
For "hello world"(88bits in length) it should
return the encoded sequence: "00100010101101110011110010101111"(32bits in length)
with a compression ratio of : 36.4%

For "Today is a nice snowing day ... Yuppy !!!" (352bits) it should
return the encoded sequence: "111111111001101110011100001010011001111101111000100100101110100100101010001111010111110011010101010110101100110111001110000001000100010000001000011011110111101110000011101110111" (177bits)
with a compression ratio of : 50.3%

Note : There can be different encoding bit sequences for the same string ! If there are items with the same frequence when they are sorted it doesn't matter the order ... but this will result in a different encoding sequence....
badidea
Posts: 2586
Joined: May 24, 2007 22:10
Location: The Netherlands

Re: simple, dirty huffman coding example

Post by badidea »

Code from 'Mindless' above updated for fbc 1.09.0 32/64-bit linux/windows

Code: Select all

' huffman compressor - decompressor For FreeBasic
' to compress huffman c myfile.txt myfile.huf
' to decompress huffman d myfile.huf myfile.txt

' updated for fbc 1.09.0 32/64-bit linux/windows

'----------------------------- simple binary tree ------------------------------
namespace bt

  type node_t
    index        as integer
    node_flags   as ubyte
    node(0 to 1) as node_t ptr
  end type
 
  const leaf_node = 0, branch_node = 1
 
  function create_leaf (index as integer = 0) as node_t ptr
    dim as node_t ptr node_new = callocate(sizeof(node_t))
    node_new->index = index
    node_new->node_flags = leaf_node
    return node_new
  end function
 
  function create_branch (index as integer = 0, byval node0 as node_t ptr = 0, byval node1 as node_t ptr = 0) as node_t ptr
    dim as node_t ptr node_new = callocate(sizeof(node_t))
    node_new->index = index
    node_new->node_flags = branch_node
    node_new->node(0) = node0
    node_new->node(1) = node1
    return node_new
  end function
 
  function destroy (byval node_given as node_t ptr) as ubyte
    if node_given->node_flags and branch_node then
      if node_given->node(0) then destroy(node_given->node(0))
      if node_given->node(1) then destroy(node_given->node(1))
    end if
    deallocate(node_given)
    return 0
  end function
 
  'size in bits?
  function calculate_size (byval node_given as node_t ptr) as uinteger
    dim as uinteger size
    if node_given->node_flags and branch_node then
      if node_given->node(0) then size += calculate_size(node_given->node(0))
      if node_given->node(1) then size += calculate_size(node_given->node(1))
    else
      size += 8 'leaf = byte value?
    end if
    size += 1 '1 bit per branch depth
    return size
  end function
 
  function save_tree (byval node_given as node_t ptr, bt_data as ubyte ptr, _
    byref bt_data_ptr as uinteger = 0, byref bt_data_bit_ptr as byte = 8) as ubyte
    '
    if bt_data_bit_ptr = 0 then
      bt_data_ptr += 1
      bt_data_bit_ptr = 8
    end if
    bt_data_bit_ptr -= 1
    if node_given->node_flags and branch_node then
      bt_data[bt_data_ptr] = bitreset(bt_data[bt_data_ptr], bt_data_bit_ptr)
      if node_given->node(0) then save_tree(node_given->node(0), bt_data, bt_data_ptr, bt_data_bit_ptr)
      if node_given->node(1) then save_tree(node_given->node(1), bt_data, bt_data_ptr, bt_data_bit_ptr)
    else 'leaf_node
      bt_data[bt_data_ptr] = bitset(bt_data[bt_data_ptr], bt_data_bit_ptr)
      dim as ubyte i
      for i = 0 to 7
        if bt_data_bit_ptr = 0 then
          bt_data_ptr += 1
          bt_data_bit_ptr = 8
        end if
        bt_data_bit_ptr -= 1
        if bit(node_given->index, 7 - i) then
          bt_data[bt_data_ptr] = bitset(bt_data[bt_data_ptr], bt_data_bit_ptr)
        else
          bt_data[bt_data_ptr] = bitreset(bt_data[bt_data_ptr], bt_data_bit_ptr)
        end if
      next
    end if
    return 0
  end function
 
  function load_tree (byval bt_data as ubyte ptr, byref bt_data_ptr as uinteger = 0, _
    byref bt_data_bit_ptr as byte = 8) as node_t ptr
    '
    dim as node_t ptr return_node
    if bt_data_bit_ptr = 0 then
      bt_data_ptr += 1
      bt_data_bit_ptr = 8
    end if
    bt_data_bit_ptr -= 1
    if bit(bt_data[bt_data_ptr], bt_data_bit_ptr) = 0 then
      return_node = create_branch(-1)
      return_node->node(0) = load_tree(bt_data, bt_data_ptr, bt_data_bit_ptr)
      return_node->node(1) = load_tree(bt_data, bt_data_ptr, bt_data_bit_ptr)
    else
      dim as uinteger index
      dim as ubyte i
      for i = 0 to 7
        if bt_data_bit_ptr = 0 then
          bt_data_ptr += 1
          bt_data_bit_ptr = 8
        end if
        bt_data_bit_ptr -= 1
        if bit(bt_data[bt_data_ptr], bt_data_bit_ptr) then
          index = bitset(index, 7 - i)
        else
          index = bitreset(index, 7 - i)
        end if
      next
      return_node = create_leaf(index)
    end if
    return return_node
  end function
 
  function dump (byval node_given as node_t ptr, indent as ubyte = 0) as ubyte
    if node_given->node_flags and branch_node then
      print string(indent, 32) & "branch: " & node_given->index
      if node_given->node(0) then dump(node_given->node(0), indent + 2)
      if node_given->node(1) then dump(node_given->node(1), indent + 2)
    else
      print string(indent, 32) & "leaf: " & node_given->index
    end if
    return 0
  end function
 
  function qtable (byval node_given as node_t ptr, path() as ulong, bits() as ubyte, byref cpath as ulong = 0, byref cbits as ubyte = 0) as ubyte
    if cbits > 31 then print "QTABLE FAILED!" : stop
    if node_given->node_flags and branch_node then
      if node_given->node(0) then qtable(node_given->node(0), path(), bits(), (cpath shl 1), cbits + 1)
      if node_given->node(1) then qtable(node_given->node(1), path(), bits(), (cpath shl 1) or 1, cbits + 1)
    else
      path(node_given->index) = cpath
      bits(node_given->index) = cbits
    end if
    return 0
  end function
 
end namespace

'----------------------------- simple linked list ------------------------------
namespace ll

  type node_t
    index      as integer
    prev_node  as node_t ptr
    next_node  as node_t ptr
    foo        as any ptr
  end type

  const as uinteger no_node = 0
 
  declare function create (index as integer = 0) as node_t ptr
  declare function destroy (byval node_given as node_t ptr) as node_t ptr
  declare function insert_after (byval node_given as node_t ptr, index as integer = 0) as node_t ptr
  declare function deleet (byval node_given as node_t ptr) as node_t ptr
  declare function nswap (byval node_given1 as node_t ptr, byval node_given2 as node_t ptr) as ubyte
  declare function seek_first (byval node_given as node_t ptr) as node_t ptr
  declare function sort_index (byval node_given as node_t ptr) as ubyte
  declare function count (byval node_given as node_t ptr) as uinteger
  declare function dump (byval node_given as node_t ptr) as ubyte
 
  function create (index as integer = 0) as node_t ptr
    dim as node_t ptr node_new = callocate(sizeof(node_t))
    node_new->index = index
    return node_new
  end function

  function destroy (byval node_given as node_t ptr) as node_t ptr
    dim as node_t ptr node_current = node_given
    node_current = seek_first(node_current)
    do until (node_current->next_node = no_node)
      node_current = node_current->next_node
      deallocate(node_current->prev_node)
    loop
    deallocate(node_current)
    return no_node
  end function

  function insert_after (byval node_given as node_t ptr, index as integer = 0) as node_t ptr
    dim as node_t ptr node_new = callocate(sizeof(node_t))
    if not(node_given->next_node = no_node) then
      node_new->next_node = node_given->next_node
      node_given->next_node->prev_node = node_new
    end if
    node_given->next_node = node_new
    node_new->prev_node = node_given
    node_new->index = index
    return node_new
  end function

  function deleet (byval node_given as node_t ptr) as node_t ptr
    dim as node_t ptr node_current
    if (node_given->prev_node = no_node) and (node_given->prev_node = no_node) then
      node_current = no_node
    elseif (node_given->prev_node = no_node) then
      node_current = node_given->next_node
      node_given->next_node->prev_node = no_node
    elseif (node_given->next_node = no_node) then
      node_current = node_given->prev_node
      node_given->prev_node->next_node = no_node
    else
      node_current = node_given->next_node
      node_given->prev_node->next_node = node_given->next_node
      node_given->next_node->prev_node = node_given->prev_node
    end if
    deallocate(node_given)
    return node_current
  end function

  function nswap (byval node_given1 as node_t ptr, byval node_given2 as node_t ptr) as ubyte
    swap node_given1->index, node_given2->index
    swap node_given1->foo, node_given2->foo
    return 0
  end function

  function seek_first (byval node_given as node_t ptr) as node_t ptr
    dim as node_t ptr node_current = node_given
    do until (node_current->prev_node = no_node)
      node_current = node_current->prev_node
    loop
    return node_current
  end function

  function sort_index (byval node_given as node_t ptr) as ubyte
    dim as node_t ptr node_current = node_given
    dim as node_t ptr node_grab, node_match
    node_current = seek_first(node_current)
    node_grab = node_current
    do until (node_grab->next_node = no_node)
      node_match = node_grab
      node_current = node_grab
      do until (node_current->next_node = no_node)
        node_current = node_current->next_node
        if (node_current->index < node_match->index) then node_match = node_current
      loop
      if (node_match <> node_grab) then nswap(node_match, node_grab)
      node_grab = node_grab->next_node
    loop
    return 0
  end function

  function count (byval node_given as node_t ptr) as uinteger
    dim as node_t ptr node_current = node_given
    dim as uinteger ncount = 1
    node_current = seek_first(node_current)
    do until (node_current->next_node = no_node)
      node_current = node_current->next_node
      ncount += 1
    loop
    return ncount
  end function

  function dump (byval node_given as node_t ptr) as ubyte
    dim as node_t ptr node_current = node_given
    node_current = seek_first(node_current)
    print "index", "foo", "previous", "next"
    do
      print hex(node_current->index, 8), hex(node_current->foo, 8),;
      if not(node_current->prev_node = no_node) then
        print hex(node_current->prev_node->index, 8), ';
      else
        print " -", ';
      end if
      if not(node_current->next_node = no_node) then
        print hex(node_current->next_node->index, 8)
      else
        print " -"
        exit do
      end if
      node_current = node_current->next_node
    loop
    return 0
  end function

end namespace

'-------------------------------- main program ---------------------------------

'File format (1-based):
' 1 ...  4 = "HUFF"
' 5 ...  8 = ts (tree size)
' 9 ... 12 = cs (compressed data size)
'13 ... 16 = ds (original data size)
'tree data
'compressed data

select case command(1)
'---------------------------------- compress -----------------------------------
case "c"
  dim as ubyte ptr ddata, cdata, tdata
  dim as ulong ds, cs, ts
  dim as uinteger di, ci 
  dim as ubyte cb = 8

  'original input file
  dim as long f = freefile
  open command(2) for binary access read as #f
  ds = lof(f) 'data size
  ddata = callocate(ds) 'raw data
  get #f, , *ddata, ds 'read all
  close #f
 
  print "  building tree..."
 
  dim as uinteger u(255)

  'histogram, count byte occurance
  for di = 0 to ds - 1
    u(ddata[di]) += 1
  next
 
  dim as ll.node_t ptr l

  'loop histogram, build linked list
  dim as uinteger i
  for i = 0 to 255
    if u(i) then '> 0?
      '~ print chr(i), u(i)
      if l then
        l = ll.insert_after(l, u(i))
      else
        l = ll.create(u(i))
      end if
      l->foo = bt.create_leaf(i)
    end if
  next

  '~ ll.dump(l)

  '~ for i as integer = 0 to 255
    '~ print u(i)
  '~ next
 
  do while ll.count(l) > 1
    ll.sort_index(l)
    l = ll.seek_first(l)
   
    l->foo = bt.create_branch(-1, l->foo, l->next_node->foo)
    l->index += l->next_node->index
    ll.deleet(l->next_node)
  loop

  dim as bt.node_t ptr b

  b = l->foo
  ll.destroy(l)
 
  ts = (bt.calculate_size(b) + 7) \ 8
  '~ print "bt.calculate_size(b): " & bt.calculate_size(b) & " bits"
  tdata = callocate(ts)

  bt.save_tree(b, tdata)

  '~ bt.dump(b)

  '~ for i as integer = 0 to ts-1
    '~ print bin(tdata[i])
  '~ next

  'write hufmann output file
  dim as long o = freefile
  open command(3) for binary access write as #o
  put #o, , "HUFF"
  put #o, , ts 'tree size
  '~ print "ts: " & ts
  seek #o, 13
  put #o, , ds
  '~ print "ds: " & ds
  put #o, , *tdata, ts 'tree data
 
  dim as ulong qtp(255)
  dim as ubyte qtb(255)
  bt.qtable(b, qtp(), qtb())
 
  print "  compressing..." ';
 
  cdata = callocate(ds)
  for di = 0 to ds - 1
    if qtb(ddata[di]) then
      for i = 0 to qtb(ddata[di]) - 1
        if cb = 0 then
          ci += 1
          cb = 8
        end if
        cb -= 1
        cdata[ci] shl= 1
        cdata[ci] or= abs(bit(qtp(ddata[di]), qtb(ddata[di]) - i - 1))
      next
    end if
    if di mod 1024 = 0 then locate , 80 - 26 : print " " & string(fix(di / ds * 20), ".") & string(20 - fix(di / ds * 20), "o") & " " & fix(di / ds * 100) & "%" ';
  next
 
  cdata[ci] shl= cb

  cs = ci + 1
  put #o, , *cdata, cs
  put #o, 9, cs
  '~ print "cs: " & cs
 
  bt.destroy(b)
 
  deallocate(ddata)
  deallocate(cdata)

  close #o 'added
  
'--------------------------------- decompress ----------------------------------
case "d"
  dim as ubyte ptr tdata, cdata, ddata
  dim as uinteger ci, di
  dim as ulong ts, cs, ds
  dim as ubyte cb = 8
  dim as string header = string(4, 0)
 
  dim as long f = freefile
  open command(2) for binary access read as #f

  get #f, , header
  if header <> "HUFF" then
    close #f
    print "invalid source file (HUFF expected)"
    end
  end if
  seek #f, 5
  get #f, , ts
  '~ print "ts: " & ts
  tdata = callocate(ts)
  get #f, , cs
  '~ print "cs: " & cs
  cdata = callocate(cs)
  get #f, , ds
  '~ print "ds: " & ds
  ddata = callocate(ds)
  get #f, , *tdata, ts
  get #f, , *cdata, cs
 
  close #f
 
  print "  reading tree..."
 
  dim as bt.node_t ptr tr = bt.load_tree(tdata), tc
  deallocate(tdata)
 
  print "  decompressing..." ';
 
  do while di < ds
    tc = tr
    do while tc->node_flags and bt.branch_node
      if cb = 0 then
        ci += 1
        cb = 8
      end if
      cb -= 1
      tc = tc->node(abs(bit(cdata[ci], cb)))
    loop
    ddata[di] = tc->index
    di += 1
    if di mod 1024 = 0 then locate , 80 - 26 : print " " & string(fix(di / ds * 20), ".") & string(20 - fix(di / ds * 20), "o") & " " & fix(di / ds * 100) & "%" ';
  loop
 
  dim as long o = freefile
  open command(3) for binary access write as #o
 
  put #o, , *ddata, ds
 
  close #o
 
  bt.destroy(tr)
 
  deallocate(cdata)
  deallocate(ddata)

'------------------------------ test, show tree --------------------------------
case "t"
  dim as ubyte ptr tdata
  dim as ulong ts, cs, ds
 
  dim as long f = freefile
  open command(2) for binary access read as #f
 
  dim as string header = string(4, 0)
  get #f, , header
  if header <> "HUFF" then
    close #f
    print "Invalid source file (HUFF expected)"
    end
  end if
  seek #f, 5
  get #f, , ts
  tdata = callocate(ts)
  get #f, , cs
  get #f, , ds
  get #f, , *tdata, ts
 
  close #f

  '~ for i as integer = 0 to ts-1
    '~ print tdata[i]
  '~ next
  dim as bt.node_t ptr tr = bt.load_tree(tdata), tc
  deallocate(tdata)
 
  bt.dump(tr)
 
  bt.destroy(tr)
 
case else
  print command(0) & " c|d src dst"
  print "c = compress, d = decompress"
 
end select
Post Reply