simple compressor LZg9

User projects written in or related to FreeBASIC.
Post Reply
Vitamin
Posts: 3
Joined: Nov 04, 2023 18:33

simple compressor LZg9

Post by Vitamin »

small byte oriented compressor Lzg9.bas:

Code: Select all

Namespace Lzg9
	
Const sizeInt = SizeOf(Integer)
#Define MemCopy(adrIn,adrOut,size) fb_memcopy(*(adrOut),*(adrIn),size)
#Define WriteCaption() Poke Long,outData,&H09477A4C : WriteVarInt(inSize)
#Define OptSize(size) (size-1 And Not(sizeInt-1)) + sizeInt

#Macro WriteVarInt(value)
	i = 1 : outValP = outData+indOut : inVal = value
	*outValP = inVal And 127 : inVal Shr= 7
	Do While inVal
		outValP[i-1] Or= 128 : outValP[i] = inVal
		inVal Shr= 7 : i += 1
	Loop
	indOut += i
#EndMacro

#Macro ReadVarInt(value)
	k = 0 : inValP = inData+indIn : value = *inValP And 127
	Do While inValP[k] > 127
		k += 1 : value Or= Cast(Integer, inValP[k] And 127) Shl k*7
	Loop
	indIn += k+1
#EndMacro

#Macro AddHash4(adrIn)
	h4 = Peek(ULong,adrIn)*41443 + (Peek(ULong,adrIn) Shr 16)
#EndMacro

#Macro AddHash42()
	hash4(h4) = counter : counter += 1
#EndMacro

#Macro WriteFound()
	If length > 4 Then
		If length < 20 Then
			outData[indOut] = 128 Or ((length-5) Shl 3) Or (distance And 7) : indOut += 1
		Else
			outData[indOut] = 248 Or (distance And 7) : indOut += 1
			WriteVarInt(length-20)
		EndIf
		distance Shr= 3
	Else
		outData[indOut] = 64 Or (distance And 63)
		distance Shr= 6 : indOut += 1
	EndIf
#EndMacro

#Macro WriteFound2()
	If length > 4 Then
		If length < 20 Then
			outData[indOut] = 128 Or ((length-5) Shl 3) Or (distance And 7)
			indOut += 1 : sizeOldFound = 1
		Else
			outData[indOut] = 248 Or (distance And 7) : indOut += 1
			WriteVarInt(length-20) : sizeOldFound = i+1
		EndIf
		distance Shr= 3
	Else
		outData[indOut] = 64 Or (distance And 63)
		distance Shr= 6 : indOut += 1 : sizeOldFound = 1
	EndIf
#EndMacro

#Macro AddFoundOfLiteral()
	Do While (inData[indFound-1] = inData[indIn-1]) And (indFound > 0) And (literal > 0)
		indFound -= 1 : indIn -= 1 : literal -= 1 : addLeft += 1 : length += 1
	Loop
	If literal Then
		WriteLiteral(inData,outData,indIn,indOut,literal,adrLit)
	Else
		i = 1
		Do While i <= dlnPred And ((inData[indFound-i] = inData[indIn-i]) And (indFound-i>0))
			i += 1
		Loop
		If i > dlnPred Then
			indIn -= dlnPred : addLeft += dlnPred
			length += dlnPred : indOut -= sizeOldFound
		EndIf
	EndIf
#EndMacro

#Macro CompareFoundBlock(lenFound)
	For i = indIn+lenFound To inLimit Step sizeInt
		If Peek(Integer,inData+i) = Peek(Integer,inData+indFound+length) Then length += sizeInt Else Exit For
	Next
#EndMacro

#Macro CompareFound()
	For i = indIn+length To indMax
		If inData[i] = inData[indFound+length] Then length += 1 Else Exit For
	Next
#EndMacro

Sub WriteLiteral(ByRef inData As UByte Ptr,ByRef outData As UByte Ptr,_
								ByRef indIn As Integer,ByRef indOut As Integer,_
								ByRef size As Integer,ByRef adrLit As Integer)
	If size < 49 Then
		If size > 1 Then
			outData[indOut] = size-2 : indOut += 1
		Else
			If adrLit Then
				outData[adrLit] Or= inData[indIn-1] Shr 4
				outData[indOut] = 15 And inData[indIn-1] Or 48 : adrLit = 0
			Else
				outData[indOut] = 48 : adrLit = indOut
				indOut += 1 : outData[indOut] = inData[indIn-1]
			EndIf
			size = 0 : indOut += 1 : Exit Sub
		EndIf
	Else
		Dim As UByte Ptr outValP : Dim As Integer inVal, i
		outData[indOut] = 47 : indOut += 1
		WriteVarInt(size-49)
	EndIf
	MemCopy(@inData[indIn-size],@outData[indOut],OptSize(size))
	indOut += size : size = 0
End Sub

Function Compr1(inData As UByte Ptr,inSize As Integer,_
									outData As UByte Ptr) As Integer
	#Macro CreatingHash(adrIn)
		h4 = (Peek(ULong,adrIn) Shl 4) + (Peek(ULong,adrIn) Shr 16)
	#EndMacro
	Dim As Integer literal,length,inLimit,i,inVal,indIn,adrLit
	Dim As Integer indFound,distance,indOut=4,indMax=inSize-1
	Dim As ULong counter,hash4(65535)
	Dim As UShort h4
	Dim As UByte Ptr outValP
	WriteCaption()
	If inSize > 3 Then
		inLimit = inSize-8 : CreatingHash(inData) : hash4(h4) = indIn
	EndIf
	literal = 1 : indIn = 1
	Do While indIn < inLimit
		CreatingHash(inData+indIn) : indFound = hash4(h4)
		If Peek(Long,inData+indFound) = Peek(Long,inData+indIn) Then
			length = 4 : distance = indIn-indFound : CompareFoundBlock(4) : CompareFound()
		EndIf
		hash4(h4) = indIn
		If length Then
			Do While (inData[indFound-1] = inData[indIn-1]) And (indFound > 0) And (literal > 0)
				indFound -= 1 : indIn -= 1 : literal -= 1 : length += 1
			Loop
			If literal Then
				WriteLiteral(inData,outData,indIn,indOut,literal,adrLit)
			EndIf
			WriteFound() : WriteVarInt(distance) : indIn += length : length = 0
		Else
			literal += 1 : indIn += 1
		EndIf
	Loop
	literal += inSize-indIn
	If literal Then
		WriteLiteral(inData,outData,inSize,indOut,literal,adrLit)
	EndIf
	Return indOut
End Function

Function Compr2(inData As UByte Ptr,inSize As Integer,_
									outData As UByte Ptr) As Integer
	Dim As Integer literal,length,inLimit,indIn,indMax=inSize-1
	Dim As Integer i,inVal,indFound,addLeft,adrLit,distance,counter,indOut=4
	Dim As UShort h4
	Dim As ULong hash4(65535)
	Dim As UByte Ptr outValP : WriteCaption()
	If inSize > 7 Then
		inLimit = inSize-8 : AddHash4(inData) : AddHash42()
	EndIf
	literal = 1 : indIn = 1
	Do While indIn < inLimit
		AddHash4(inData+indIn) : indFound = hash4(h4)
		If Peek(Long,inData+indFound) = Peek(Long,inData+indIn) Then
			length = 4 : distance = indIn-indFound : CompareFoundBlock(4) : CompareFound()
		EndIf
		AddHash42()
		If length Then
			Do While (inData[indFound-1] = inData[indIn-1]) And (indFound > 0) And (literal > 0)
				indFound -= 1 : indIn -= 1 : literal -= 1 : addLeft += 1 : length += 1
			Loop
			If literal Then
				WriteLiteral(inData,outData,indIn,indOut,literal,adrLit)
			EndIf
			WriteFound() : WriteVarInt(distance)
			If indIn+length < inLimit Then
				For outValP = inData+indIn+addLeft+1 To inData+indIn+length-1
					AddHash4(outValP) : AddHash42()
				Next
			EndIf
			indIn += length : length = 0 : addLeft = 0
		Else
			literal += 1 : indIn += 1
		EndIf
	Loop
	literal += inSize-indIn
	If literal Then
		WriteLiteral(inData,outData,inSize,indOut,literal,adrLit)
	EndIf
	Return indOut
End Function

Function Compr3(inData As UByte Ptr,inSize As Integer,_
									outData As UByte Ptr) As Integer
	Dim As Integer i,indFound,literal,length,inLimit,indIn,inVal,indMax=inSize-1
	Dim As Integer addLeft,adrLit,distance,counter,sizeOldFound,dlnPred,indOut=4
	Dim As UShort h4 : Dim As ULong hash4(65535)
	Dim As UByte Ptr outValP : WriteCaption()
	If inSize > 7 Then
		inLimit = inSize-8 : AddHash4(inData) : AddHash42()
	EndIf
	literal = 1 : indIn = 1
	Do While indIn < inLimit
		AddHash4(inData+indIn) : indFound = hash4(h4)
		If Peek(Long,inData+indFound) = Peek(Long,inData+indIn) Then
			length = 4 : distance = indIn-indFound : CompareFoundBlock(4) : CompareFound()
		EndIf
		AddHash42()
		If length Then
			AddFoundOfLiteral() : WriteFound2()
			WriteVarInt(distance) : sizeOldFound += i
			If indIn+length < inLimit Then
				For outValP = inData+indIn+addLeft+1 To inData+indIn+length-1
					AddHash4(outValP) : AddHash42()
				Next
			EndIf
			dlnPred = length : indIn += length : length = 0 : addLeft = 0
		Else
			literal += 1 : indIn += 1
		EndIf
	Loop
	literal += inSize-indIn
	If literal Then
		WriteLiteral(inData,outData,inSize,indOut,literal,adrLit)
	EndIf
	Return indOut
End Function

Function Compr4(inData As UByte Ptr,inSize As Integer,_
									outData As UByte Ptr) As Integer
	#Macro AddHash()
		hash4(h4) = counter : hash8(h8) = counter : counter += 1
	#EndMacro
	#Macro CreatingHash(adrIn)
		h4 = Peek(ULong,adrIn)*41443 + (Peek(ULong,adrIn) Shr 16)
		h8 = Peek(ULong,adrIn+4)*67777 + (Peek(ULong,adrIn+4) Shr 16) + h4
	#EndMacro
	Dim As Integer i,indFound,literal,length,inLimit,indIn,inVal,indMax=inSize-1
	Dim As Integer addLeft,adrLit,distance,counter,sizeOldFound,dlnPred,indOut=4
	Dim As UShort h4,h8 : Dim As ULong hash4(65535),hash8(65535)
	Dim As UByte Ptr outValP : WriteCaption()
	If inSize > 7 Then
		inLimit = inSize-8 : CreatingHash(inData) : AddHash()
	EndIf
	literal = 1 : indIn = 1
	Do While indIn < inLimit
		CreatingHash(inData+indIn) : indFound = hash8(h8)
		If Peek(LongInt,inData+indFound) = Peek(LongInt,inData+indIn) Then
			length = 8 : distance = indIn-indFound : CompareFoundBlock(8)
		Else
			indFound = hash4(h4)
			If Peek(Long,inData+indFound) = Peek(Long,inData+indIn) Then
				length = 4 : distance = indIn-indFound
			EndIf
		EndIf
		AddHash()
		If length Then
			CompareFound() : AddFoundOfLiteral() : WriteFound2()
			WriteVarInt(distance) : sizeOldFound += i
			If indIn+length < inLimit Then
				For outValP = inData+indIn+addLeft+1 To inData+indIn+length-1
					CreatingHash(outValP) : AddHash()
				Next
			EndIf
			dlnPred = length : indIn += length : length = 0 : addLeft = 0
		Else
			literal += 1 : indIn += 1
		EndIf
	Loop
	literal += inSize-indIn
	If literal Then
		WriteLiteral(inData,outData,inSize,indOut,literal,adrLit)
	EndIf
	Return indOut
End Function

Function Compr5(inData As UByte Ptr,inSize As Integer,_
									outData As UByte Ptr) As Integer
	#Macro AddHash()
		hash4(h4) = counter : hash8(h8) = counter : counter += 1
	#EndMacro
	#Macro CreatingHash(adrIn)
		h4 = Peek(ULong,adrIn)*41443 + (Peek(ULong,adrIn) Shr 16)
		h8 = (Peek(ULong,adrIn+4)*67777 + (Peek(ULong,adrIn+4) Shr 16) + h4) And bitMsk
	#EndMacro
	Dim As Integer i,indFound,literal,length,inLimit,indIn,inVal,indMax=inSize-1
	Dim As Integer addLeft,adrLit,distance,counter,sizeOldFound,dlnPred,indOut=4,bitMsk=17
	Do While (1 shl (bitMsk+4)) < inSize : bitMsk += 1 : Loop
	bitMsk = (1 shl bitMsk) - 1
	Dim As UShort h4 : Dim As ULong h8,hash4(65535)
	Dim As UByte Ptr outValP : ReDim As ULong hash8(bitMsk) : WriteCaption()
	If inSize > 7 Then
		inLimit = inSize-8 : CreatingHash(inData) : AddHash()
	EndIf
	literal = 1 : indIn = 1
	Do While indIn < inLimit
		CreatingHash(inData+indIn) : indFound = hash8(h8)
		If Peek(LongInt,inData+indFound) = Peek(LongInt,inData+indIn) Then
			length = 8 : distance = indIn-indFound : CompareFoundBlock(8)
		Else
			indFound = hash4(h4)
			If Peek(Long,inData+indFound) = Peek(Long,inData+indIn) Then
				length = 4 : distance = indIn-indFound
			EndIf
		EndIf
		AddHash()
		If length Then
			CompareFound() : AddFoundOfLiteral() : WriteFound2()
			WriteVarInt(distance) : sizeOldFound += i
			If indIn+length < inLimit Then
				For outValP = inData+indIn+addLeft+1 To inData+indIn+length-1
					CreatingHash(outValP) : AddHash()
				Next
			EndIf
			dlnPred = length : indIn += length : length = 0 : addLeft = 0
		Else
			literal += 1 : indIn += 1
		EndIf
	Loop
	literal += inSize-indIn
	If literal Then
		WriteLiteral(inData,outData,inSize,indOut,literal,adrLit)
	EndIf
	Return indOut
End Function

Function Compr6(inData As UByte Ptr,inSize As Integer,_
									outData As UByte Ptr) As Integer
	#Macro AddHash()
		hash4(h4) = counter : hash6(h6) = counter : hash8(h8) = counter : counter += 1
	#EndMacro
	#Macro CreatingHash(adrIn)
		h4 = Peek(ULong,adrIn)*41443 + (Peek(ULong,adrIn) Shr 16)
		h6 = (Peek(ULong,adrIn+2)*67777 + h4) And bitMsk
		h8 = (Peek(ULong,adrIn+4)*67777 + (Peek(ULong,adrIn+4) Shr 16) + h4) And bitMsk
	#EndMacro
	Dim As Integer i,indFound,literal,length,inLimit,indIn,inVal,indMax=inSize-1
	Dim As Integer addLeft,adrLit,distance,counter,sizeOldFound,dlnPred,indOut=4,bitMsk=17
	Do While (1 shl (bitMsk+4)) < inSize : bitMsk += 1 : Loop
	bitMsk = (1 shl bitMsk) - 1
	Dim As UByte Ptr outValP : Dim As UShort h4 : Dim As ULong h6,h8,hash4(65535)
	ReDim As ULong hash6(bitMsk),hash8(bitMsk) : WriteCaption()
	If inSize > 7 Then
		inLimit = inSize-8 : CreatingHash(inData) : AddHash()
	EndIf
	literal = 1 : indIn = 1
	Do While indIn < inLimit
		CreatingHash(inData+indIn)
		If literal < 192 Or (literal And 3) = 0 Then
			indFound = hash8(h8)
			If Peek(LongInt,inData+indFound) = Peek(LongInt,inData+indIn) Then
				length = 8 : distance = indIn-indFound : CompareFoundBlock(8)
			Else
				#Define msk6B &H0000ffffffffffff
				indFound = hash6(h6)
				If ((Peek(LongInt,inData+indFound) Eqv Peek(LongInt,inData+indIn)) And msk6B) = msk6B Then
					length = 6 : distance = indIn-indFound : CompareFoundBlock(6)
				Else
					indFound = hash4(h4)
					If Peek(Long,inData+indFound) = Peek(Long,inData+indIn) Then
						length = 4 : distance = indIn-indFound
					EndIf
				EndIf
			EndIf
		EndIf
		AddHash()
		If length Then
			CompareFound() : AddFoundOfLiteral() : WriteFound2()
			WriteVarInt(distance) : sizeOldFound += i
			If indIn+length < inLimit Then
				For outValP = inData+indIn+addLeft+1 To inData+indIn+length-1
					CreatingHash(outValP) : AddHash()
				Next
			EndIf
			dlnPred = length : indIn += length : length = 0 : addLeft = 0
		Else
			literal += 1 : indIn += 1
		EndIf
	Loop
	literal += inSize-indIn
	If literal Then
		WriteLiteral(inData,outData,inSize,indOut,literal,adrLit)
	EndIf
	Return indOut
End Function

Function RaCompr(inData As UByte Ptr,inSize As Integer,_
										outData As UByte Ptr) As Integer
	Dim As Integer literal,sovpad,distance,indOut,k,indIn
	Dim As UByte Ptr inValP : Dim As UByte bufProp,bufFl,komBait
	Do
		komBait = inData[indIn] : indIn += 1
		If komBait > 63 Then
			If komBait > 127 Then
				sovpad = ((komBait Shr 3) And 15)
				If sovpad < 15 Then
					sovpad += 5
				Else
					ReadVarInt(sovpad) : sovpad += 20
				EndIf
				ReadVarInt(distance) : distance = (distance Shl 3) Or (komBait And 7)
			Else
				sovpad = 4 : ReadVarInt(distance)
				distance = (distance Shl 6) Or (komBait And 63)
			EndIf
			If sovpad > distance Then
				If distance >= 4 Then
					For inValP = outData+indOut To outData+indOut+sovpad-1 Step 4
						Poke Long, inValP, Peek(Long,inValP-distance)
					Next
				ElseIf distance < 3 Then
					k = 4-distance
					For inValP = outData+indOut To outData+indOut+k-1
						*inValP = *(inValP-distance)
					Next
					For inValP = outData+indOut+k To outData+indOut+sovpad-1 Step 4
						Poke Long, inValP, Peek(Long,inValP-4)
					Next
				Else
					For inValP = outData+indOut To outData+indOut+sovpad-1 Step 3
						Poke Long, inValP, Peek(Long,inValP-3)
					Next
				EndIf
			Else
				MemCopy(outData+indOut-distance,outData+indOut,OptSize(sovpad))
			EndIf
			indOut += sovpad
		Else
			If komBait < 48 Then
				If komBait <> 47 Then
					literal = komBait + 2
				Else
					ReadVarInt(literal) : literal += 49
				EndIf
				MemCopy(inData+indIn,outData+indOut,OptSize(literal))
				indIn += literal : indOut += literal
			Else
				If bufFl Then
					outData[indOut] = bufProp Or (komBait And 15) : bufFl = 0
				Else
					bufProp = komBait Shl 4 : bufFl = 1
					outData[indOut] = inData[indIn] : indIn += 1
				EndIf
				indOut += 1
			EndIf
		EndIf
	Loop While indIn < inSize
	Return 1
End Function

Function ZapZglvk0(outData As UByte Ptr, inSize As Integer) As Integer
	Dim As UByte Ptr outValP : Dim As Integer i,inVal,indOut=4
	WriteCaption()
	If inSize < 49 Then
		outData[indOut] = IIf(inSize > 1,inSize-2,48)
		indOut += 1
	Else
		outData[indOut] = 47 : indOut += 1
		WriteVarInt(inSize-49)
	EndIf
	Return indOut
End Function

Function Compr0(zglvk As UByte Ptr,zglvkRzm As Integer,inData As UByte Ptr,_
								inSize As Integer,outData As UByte Ptr) As Integer
	MemCopy(zglvk,outData,zglvkRzm)
	MemCopy(inData,outData+zglvkRzm,inSize)
	Return inSize + zglvkRzm
End Function

#Macro Dekodmethod()
	Var Compr = @Compr4
	Select Case method
		Case 1 : Compr = @Compr1
		Case 2 : Compr = @Compr2
		Case 3 : Compr = @Compr3
		Case 5 : Compr = @Compr5
		Case 6 : Compr = @Compr6
	End Select
#EndMacro
Type TipaStr
  As ZString Ptr data1
  As Integer len1, size1
End Type
Function Compress OverLoad (vhSt As String,	method As Integer=4) As String
	If Len(vhSt) = 0 Then Return ""
	If method Then
		Dim As UByte Ptr outData = Allocate(Len(vhSt)*1.04+16)
		If outData = 0 Then Return ""
		Dekodmethod() : Var vihMasRazm = Compr(SAdd(vhSt),Len(vhSt),outData)
		Dim As TipaStr sTip = Type(Cast(ZString Ptr,outData),vihMasRazm,vihMasRazm+1)
	  Function = *Cast(String Ptr, @sTip)
	  DeAllocate outData
	Else
		Dim As UByte zglvk(20) : Var rzZg = ZapZglvk0(@zglvk(0),Len(vhSt))
		Var rz = Len(vhSt) + rzZg, vihSt = Space(rz)
		If Len(vihSt) <> rz Then Return ""
		Compr0(@zglvk(0),rzZg,SAdd(vhSt),Len(vhSt),SAdd(vihSt)) : Return vihSt
	EndIf
End Function

Function Compress(inData As UByte Ptr,inSize As Integer,_
									ByRef outData As UByte Ptr,method As Integer=4) As Integer
	If inData = 0 Or inSize = 0 Then Return -1
	If method Then
		outData = Allocate(inSize*1.04+16)
		If outData = 0 Then Return 0
		Dekodmethod() : Return Compr(inData,inSize,outData)
	Else
		Dim As UByte zglvk(20) : Var rZg = ZapZglvk0(@zglvk(0),inSize)
		outData = Allocate(inSize + rZg)
		If outData = 0 Then Return 0
		Return Compr0(@zglvk(0),rZg,inData,inSize,outData)
	EndIf
End Function

Function HtZagolovok(inData As UByte Ptr, ByRef indIn As Integer) As Integer
	If Peek(Long,inData) <> &H09477A4C Then Return 0
	Dim As UByte Ptr inValP : Dim As Integer vihMasRazm, k
	ReadVarInt(vihMasRazm) : Return vihMasRazm
End Function

Function Decompress OverLoad (vhSt As String) As String
	If Len(vhSt) < 7 Then Return ""
	Dim As UByte Ptr inData = SAdd(vhSt) : Var indIn = 4
	Var vihMasRazm = HtZagolovok(inData,indIn)
	If vihMasRazm = 0 Then Return ""
	Dim As UByte Ptr outData = Allocate(vihMasRazm+8)
	If outData = 0 Then Return ""
	If RaCompr(inData+indIn,Len(vhSt)-indIn,outData) Then
		Dim As TipaStr sTip = Type(Cast(ZString Ptr,outData),vihMasRazm,vihMasRazm+1)
	  Function = *Cast(String Ptr, @sTip)
	EndIf
  DeAllocate outData
End Function

Function Decompress(inData As UByte Ptr,inSize As Integer,_
												ByRef outData As UByte Ptr) As Integer
	If inData = 0 Or inSize < 7 Then Return -1
	Var indIn = 4, vihMasRazm = HtZagolovok(inData,indIn)
	If vihMasRazm = 0 Then Return 0
	outData = Allocate(vihMasRazm+8)
	If outData = 0 Then Return 0
	If RaCompr(inData+indIn,inSize-indIn,outData) Then Return vihMasRazm
End Function

End Namespace
for testing Test_lzg9_1.bas:

Code: Select all

#Include "lzg9.bas"

Using Lzg9

Dim As String s3, s2, s = "abracadabra abracadabra abracadabra abracadabra"

? : ?"  Compress = ";
s2 = Compress(s) : ? "OK": ?

? "  Decompress = ";
s3 = Decompress(s2) : ? "OK"

? : ? " size_in = "; Len(s)
? " size_out = "; Len(s2)
? : ? " Compare = "; IIf(s = s3, "Y", "N")

Sleep
Test_lzg9_2.bas:

Code: Select all

#Include "file.bi"
#Include "lzg9.bas" 

Var file = ExePath & "\lzg9.bas"
Var ff = FreeFile, method = 4 '	method = 0..6(max)
Dim As ULong i, sizeIn, sizeCompr, sizeIn2
Dim As UByte Ptr p1,p2,p3

If FileExists(file) = 0 Then ? "No File for Compress!" : Sleep : End
Open file For Binary Access Read As ff
sizeIn = Lof(ff)
If sizeIn Then
	p1 = Allocate(sizeIn)
	Get #ff,,*p1,sizeIn
Else
	? "No Data for Compress!" : Sleep : End
EndIf
Close ff

Var tm = Timer
? : ? " Compress = ";
sizeCompr = Lzg9.Compress(p1, sizeIn, p3, method)
? Using "##.### sek";(Timer-tm) : tm = Timer

? " Decompress = ";
sizeIn2 = Lzg9.Decompress(p3, sizeCompr, p2)
? Using "##.### sek"; (Timer-tm)
? : ? " sizeIn = "; sizeIn
? " sizeCompr = "; sizeCompr

? : ? " Compare = ";
If sizeIn = sizeIn2 Then
	For i = 0 To sizeIn-1
		If p1[i] <> p2[i] Then Exit For
	Next
EndIf
If i = sizeIn Then ? "Y" Else ? "N"

ff = FreeFile
Open file & ".lzg" For Binary Access write Lock write As ff
Put #ff,,*p3,sizeCompr
Close ff

DeAllocate p1
DeAllocate p2
DeAllocate p3

Sleep 
UEZ
Posts: 988
Joined: May 05, 2017 19:59
Location: Germany

Re: simple compressor LZg9

Post by UEZ »

Very good work and the compression result is good!

Thanks for sharing it.
Vitamin
Posts: 3
Joined: Nov 04, 2023 18:33

Re: simple compressor LZg9

Post by Vitamin »

UEZ wrote: Nov 05, 2023 10:56 Very good work and the compression result is good!
The algorithm does not limit the window size, so it can compress better than LZ4.
Corrected the translation of some variable names. Lzg9.bas:

Code: Select all

'  ///////////////////////////////////////////////////////////////////////
' ///                                                                   ///
' ///   Compressor Lzg9 version 17                      © Vitamin       ///
' ///                                                                   ///
'  //////////////////////////////////////////////////////////////////////

Namespace Lzg9
	
Const sizeInt = SizeOf(Integer)
#Define MemCopy(adrIn,adrOut,size) fb_memcopy(*(adrOut),*(adrIn),size)
#Define WriteCaption() Poke Long,outData,&H09477A4C : WriteVarInt(inSize)
#Define OptSize(size) (size-1 And Not(sizeInt-1)) + sizeInt

#Macro WriteVarInt(value)
	i = 1 : inVal = value
	outValP = outData+indOut
	*outValP = inVal And 127 : inVal Shr= 7
	Do While inVal
		outValP[i-1] Or= 128 : outValP[i] = inVal
		inVal Shr= 7 : i += 1
	Loop
	indOut += i
#EndMacro

#Macro ReadVarInt(value)
	k = 0 : inValP = inData+indIn
	value = *inValP And 127
	Do While inValP[k] > 127
		k += 1 : value Or= Cast(Integer, inValP[k] And 127) Shl k*7
	Loop
	indIn += k+1
#EndMacro

#Macro AddHash4(adrIn)
	h4 = Peek(ULong,adrIn)*41443 + (Peek(ULong,adrIn) Shr 16)
#EndMacro

#Macro AddHash42()
	hash4(h4) = counter : counter += 1
#EndMacro

#Macro WriteFound()
	If length > 4 Then
		If length < 20 Then
			outData[indOut] = 128 Or ((length-5) Shl 3) Or (distance And 7) : indOut += 1
		Else
			outData[indOut] = 248 Or (distance And 7) : indOut += 1
			WriteVarInt(length-20)
		EndIf
		distance Shr= 3
	Else
		outData[indOut] = 64 Or (distance And 63)
		distance Shr= 6 : indOut += 1
	EndIf
#EndMacro

#Macro WriteFound2()
	If length > 4 Then
		If length < 20 Then
			outData[indOut] = 128 Or ((length-5) Shl 3) Or (distance And 7)
			indOut += 1 : sizeOldFound = 1
		Else
			outData[indOut] = 248 Or (distance And 7) : indOut += 1
			WriteVarInt(length-20) : sizeOldFound = i+1
		EndIf
		distance Shr= 3
	Else
		outData[indOut] = 64 Or (distance And 63)
		distance Shr= 6
		indOut += 1 : sizeOldFound = 1
	EndIf
#EndMacro

#Macro AddFoundOfLiteral()
	Do While (inData[indFound-1] = inData[indIn-1]) And (indFound > 0) And (literal > 0)
		indFound -= 1 : indIn -= 1 : literal -= 1 : addLeft += 1 : length += 1
	Loop
	If literal Then
		WriteLiteral(inData,outData,indIn,indOut,literal,adrLit)
	Else
		i = 1
		Do While i <= lenOld And ((inData[indFound-i] = inData[indIn-i]) And (indFound-i>0))
			i += 1
		Loop
		If i > lenOld Then
			indIn -= lenOld : addLeft += lenOld
			length += lenOld : indOut -= sizeOldFound
		EndIf
	EndIf
#EndMacro

#Macro CompareFoundBlock(lenFound)
	For i = indIn+lenFound To inLimit Step sizeInt
		If Peek(Integer,inData+i) = Peek(Integer,inData+indFound+length) Then length += sizeInt Else Exit For
	Next
#EndMacro

#Macro CompareFound()
	For i = indIn+length To indMax
		If inData[i] = inData[indFound+length] Then length += 1 Else Exit For
	Next
#EndMacro

Sub WriteLiteral(ByRef inData As UByte Ptr,ByRef outData As UByte Ptr,_
								ByRef indIn As Integer,ByRef indOut As Integer,_
								ByRef size As Integer,ByRef adrLit As Integer)
	If size < 49 Then
		If size > 1 Then
			outData[indOut] = size-2 : indOut += 1
		Else
			If adrLit Then
				outData[adrLit] Or= inData[indIn-1] Shr 4
				outData[indOut] = 15 And inData[indIn-1] Or 48 : adrLit = 0
			Else
				outData[indOut] = 48 : adrLit = indOut
				indOut += 1 : outData[indOut] = inData[indIn-1]
			EndIf
			size = 0 : indOut += 1 : Exit Sub
		EndIf
	Else
		Dim As UByte Ptr outValP : Dim As Integer inVal, i
		outData[indOut] = 47 : indOut += 1
		WriteVarInt(size-49)
	EndIf
	MemCopy(@inData[indIn-size],@outData[indOut],OptSize(size))
	indOut += size : size = 0
End Sub

Function Compr1(inData As UByte Ptr,inSize As Integer,_
									outData As UByte Ptr) As Integer
	#Macro CreatingHash(adrIn)
		h4 = (Peek(ULong,adrIn) Shl 4) + (Peek(ULong,adrIn) Shr 16)
	#EndMacro
	Dim As Integer literal,length,inLimit,i,inVal,indIn,adrLit
	Dim As Integer indFound,distance,indOut=4,indMax=inSize-1
	Dim As ULong counter,hash4(65535)
	Dim As UByte Ptr outValP
	Dim As UShort h4	
	WriteCaption()
	If inSize > 3 Then
		inLimit = inSize-8 : CreatingHash(inData) : hash4(h4) = indIn
	EndIf
	literal = 1 : indIn = 1
	Do While indIn < inLimit
		CreatingHash(inData+indIn) : indFound = hash4(h4)
		If Peek(Long,inData+indFound) = Peek(Long,inData+indIn) Then
			length = 4 : distance = indIn-indFound : CompareFoundBlock(4) : CompareFound()
		EndIf
		hash4(h4) = indIn
		If length Then
			Do While (inData[indFound-1] = inData[indIn-1]) And (indFound > 0) And (literal > 0)
				indFound -= 1 : indIn -= 1 : literal -= 1 : length += 1
			Loop
			If literal Then
				WriteLiteral(inData,outData,indIn,indOut,literal,adrLit)
			EndIf
			WriteFound() : WriteVarInt(distance) : indIn += length : length = 0
		Else
			literal += 1 : indIn += 1
		EndIf
	Loop
	literal += inSize-indIn
	If literal Then
		WriteLiteral(inData,outData,inSize,indOut,literal,adrLit)
	EndIf
	Return indOut
End Function

Function Compr2(inData As UByte Ptr,inSize As Integer,_
									outData As UByte Ptr) As Integer
	Dim As Integer literal,length,inLimit,indIn,indMax=inSize-1
	Dim As Integer i,inVal,indFound,addLeft,adrLit,distance,counter,indOut=4
	Dim As ULong hash4(65535)
	Dim As UByte Ptr outValP
	Dim As UShort h4	
	WriteCaption()
	If inSize > 7 Then
		inLimit = inSize-8 : AddHash4(inData) : AddHash42()
	EndIf
	literal = 1 : indIn = 1
	Do While indIn < inLimit
		AddHash4(inData+indIn) : indFound = hash4(h4)
		If Peek(Long,inData+indFound) = Peek(Long,inData+indIn) Then
			length = 4 : distance = indIn-indFound : CompareFoundBlock(4) : CompareFound()
		EndIf
		AddHash42()
		If length Then
			Do While (inData[indFound-1] = inData[indIn-1]) And (indFound > 0) And (literal > 0)
				indFound -= 1 : indIn -= 1 : literal -= 1 : addLeft += 1 : length += 1
			Loop
			If literal Then
				WriteLiteral(inData,outData,indIn,indOut,literal,adrLit)
			EndIf
			WriteFound() : WriteVarInt(distance)
			If indIn+length < inLimit Then
				For outValP = inData+indIn+addLeft+1 To inData+indIn+length-1
					AddHash4(outValP) : AddHash42()
				Next
			EndIf
			indIn += length : length = 0 : addLeft = 0
		Else
			literal += 1 : indIn += 1
		EndIf
	Loop
	literal += inSize-indIn
	If literal Then
		WriteLiteral(inData,outData,inSize,indOut,literal,adrLit)
	EndIf
	Return indOut
End Function

Function Compr3(inData As UByte Ptr,inSize As Integer,_
									outData As UByte Ptr) As Integer
	Dim As Integer i,indFound,literal,length,inLimit,indIn,inVal,indMax=inSize-1
	Dim As Integer addLeft,adrLit,distance,counter,sizeOldFound,lenOld,indOut=4
	Dim As ULong hash4(65535)
	Dim As UByte Ptr outValP
	Dim As UShort h4	
	WriteCaption()
	If inSize > 7 Then
		inLimit = inSize-8 : AddHash4(inData) : AddHash42()
	EndIf
	literal = 1 : indIn = 1
	Do While indIn < inLimit
		AddHash4(inData+indIn) : indFound = hash4(h4)
		If Peek(Long,inData+indFound) = Peek(Long,inData+indIn) Then
			length = 4 : distance = indIn-indFound : CompareFoundBlock(4) : CompareFound()
		EndIf
		AddHash42()
		If length Then
			AddFoundOfLiteral() : WriteFound2()
			WriteVarInt(distance) : sizeOldFound += i
			If indIn+length < inLimit Then
				For outValP = inData+indIn+addLeft+1 To inData+indIn+length-1
					AddHash4(outValP) : AddHash42()
				Next
			EndIf
			lenOld = length : indIn += length : length = 0 : addLeft = 0
		Else
			literal += 1 : indIn += 1
		EndIf
	Loop
	literal += inSize-indIn
	If literal Then
		WriteLiteral(inData,outData,inSize,indOut,literal,adrLit)
	EndIf
	Return indOut
End Function

Function Compr4(inData As UByte Ptr,inSize As Integer,_
									outData As UByte Ptr) As Integer
	#Macro AddHash()
		hash4(h4) = counter : hash8(h8) = counter : counter += 1
	#EndMacro
	#Macro CreatingHash(adrIn)
		h4 = Peek(ULong,adrIn)*41443 + (Peek(ULong,adrIn) Shr 16)
		h8 = Peek(ULong,adrIn+4)*67777 + (Peek(ULong,adrIn+4) Shr 16) + h4
	#EndMacro
	Dim As Integer i,indFound,literal,length,inLimit,indIn,inVal,indMax=inSize-1
	Dim As Integer addLeft,adrLit,distance,counter,sizeOldFound,lenOld,indOut=4
	Dim As ULong hash4(65535),hash8(65535)
	Dim As UByte Ptr outValP
	Dim As UShort h4,h8		
	WriteCaption()
	If inSize > 7 Then
		inLimit = inSize-8 : CreatingHash(inData) : AddHash()
	EndIf
	literal = 1 : indIn = 1
	Do While indIn < inLimit
		CreatingHash(inData+indIn) : indFound = hash8(h8)
		If Peek(LongInt,inData+indFound) = Peek(LongInt,inData+indIn) Then
			length = 8 : distance = indIn-indFound : CompareFoundBlock(8)
		Else
			indFound = hash4(h4)
			If Peek(Long,inData+indFound) = Peek(Long,inData+indIn) Then
				length = 4 : distance = indIn-indFound
			EndIf
		EndIf
		AddHash()
		If length Then
			CompareFound() : AddFoundOfLiteral() : WriteFound2()
			WriteVarInt(distance) : sizeOldFound += i
			If indIn+length < inLimit Then
				For outValP = inData+indIn+addLeft+1 To inData+indIn+length-1
					CreatingHash(outValP) : AddHash()
				Next
			EndIf
			lenOld = length : indIn += length
			length = 0 : addLeft = 0
		Else
			literal += 1 : indIn += 1
		EndIf
	Loop
	literal += inSize-indIn
	If literal Then
		WriteLiteral(inData,outData,inSize,indOut,literal,adrLit)
	EndIf
	Return indOut
End Function

Function Compr5(inData As UByte Ptr,inSize As Integer,_
									outData As UByte Ptr) As Integer
	#Macro AddHash()
		hash4(h4) = counter : hash8(h8) = counter : counter += 1
	#EndMacro
	#Macro CreatingHash(adrIn)
		h4 = Peek(ULong,adrIn)*41443 + (Peek(ULong,adrIn) Shr 16)
		h8 = (Peek(ULong,adrIn+4)*67777 + (Peek(ULong,adrIn+4) Shr 16) + h4) And bitMsk
	#EndMacro
	Dim As Integer i,indFound,literal,length,inLimit,indIn,inVal,indMax=inSize-1
	Dim As Integer addLeft,adrLit,distance,counter,sizeOldFound,lenOld,indOut=4,bitMsk=17
	Do While (1 shl (bitMsk+4)) < inSize : bitMsk += 1 : Loop
	bitMsk = (1 shl bitMsk) - 1
	Dim As UShort h4 : Dim As ULong h8,hash4(65535)
	Dim As UByte Ptr outValP : ReDim As ULong hash8(bitMsk) : WriteCaption()
	If inSize > 7 Then
		inLimit = inSize-8 : CreatingHash(inData) : AddHash()
	EndIf
	literal = 1 : indIn = 1
	Do While indIn < inLimit
		CreatingHash(inData+indIn) : indFound = hash8(h8)
		If Peek(LongInt,inData+indFound) = Peek(LongInt,inData+indIn) Then
			length = 8 : distance = indIn-indFound
			CompareFoundBlock(8)
		Else
			indFound = hash4(h4)
			If Peek(Long,inData+indFound) = Peek(Long,inData+indIn) Then
				length = 4 : distance = indIn-indFound
			EndIf
		EndIf
		AddHash()
		If length Then
			CompareFound() : AddFoundOfLiteral() : WriteFound2()
			WriteVarInt(distance) : sizeOldFound += i
			If indIn+length < inLimit Then
				For outValP = inData+indIn+addLeft+1 To inData+indIn+length-1
					CreatingHash(outValP) : AddHash()
				Next
			EndIf
			lenOld = length : indIn += length : length = 0 : addLeft = 0
		Else
			literal += 1 : indIn += 1
		EndIf
	Loop
	literal += inSize-indIn
	If literal Then
		WriteLiteral(inData,outData,inSize,indOut,literal,adrLit)
	EndIf
	Return indOut
End Function

Function Compr6(inData As UByte Ptr,inSize As Integer,_
									outData As UByte Ptr) As Integer
	#Macro AddHash()
		hash4(h4) = counter : hash6(h6) = counter : hash8(h8) = counter : counter += 1
	#EndMacro
	#Macro CreatingHash(adrIn)
		h4 = Peek(ULong,adrIn)*41443 + (Peek(ULong,adrIn) Shr 16)
		h6 = (Peek(ULong,adrIn+2)*67777 + h4) And bitMsk
		h8 = (Peek(ULong,adrIn+4)*67777 + (Peek(ULong,adrIn+4) Shr 16) + h4) And bitMsk
	#EndMacro
	Dim As Integer i,indFound,literal,length,inLimit,indIn,inVal,indMax=inSize-1
	Dim As Integer addLeft,adrLit,distance,counter,sizeOldFound,lenOld,indOut=4,bitMsk=17
	Do While (1 shl (bitMsk+4)) < inSize : bitMsk += 1 : Loop
	bitMsk = (1 shl bitMsk) - 1
	Dim As UByte Ptr outValP : Dim As UShort h4 : Dim As ULong h6,h8,hash4(65535)
	ReDim As ULong hash6(bitMsk),hash8(bitMsk) : WriteCaption()
	If inSize > 7 Then
		inLimit = inSize-8 : CreatingHash(inData) : AddHash()
	EndIf
	literal = 1 : indIn = 1
	Do While indIn < inLimit
		CreatingHash(inData+indIn)
		If literal < 192 Or (literal And 3) = 0 Then
			indFound = hash8(h8)
			If Peek(LongInt,inData+indFound) = Peek(LongInt,inData+indIn) Then
				length = 8 : distance = indIn-indFound : CompareFoundBlock(8)
			Else
				#Define msk6B &H0000ffffffffffff
				indFound = hash6(h6)
				If ((Peek(LongInt,inData+indFound) Eqv Peek(LongInt,inData+indIn)) And msk6B) = msk6B Then
					length = 6 : distance = indIn-indFound : CompareFoundBlock(6)
				Else
					indFound = hash4(h4)
					If Peek(Long,inData+indFound) = Peek(Long,inData+indIn) Then
						length = 4 : distance = indIn-indFound
					EndIf
				EndIf
			EndIf
		EndIf
		AddHash()
		If length Then
			CompareFound() : AddFoundOfLiteral() : WriteFound2()
			WriteVarInt(distance) : sizeOldFound += i
			If indIn+length < inLimit Then
				For outValP = inData+indIn+addLeft+1 To inData+indIn+length-1
					CreatingHash(outValP) : AddHash()
				Next
			EndIf
			lenOld = length : indIn += length : length = 0 : addLeft = 0
		Else
			literal += 1 : indIn += 1
		EndIf
	Loop
	literal += inSize-indIn
	If literal Then
		WriteLiteral(inData,outData,inSize,indOut,literal,adrLit)
	EndIf
	Return indOut
End Function

Function Decmpr(inData As UByte Ptr,inSize As Integer,_
										outData As UByte Ptr) As Integer
	Dim As Integer literal,found,distance,indOut,k,indIn
	Dim As UByte bufLiteral,bufFl,token
	Dim As UByte Ptr inValP	
	Do
		token = inData[indIn] : indIn += 1
		If token > 63 Then
			If token > 127 Then
				found = ((token Shr 3) And 15)
				If found < 15 Then
					found += 5
				Else
					ReadVarInt(found) : found += 20
				EndIf
				ReadVarInt(distance)
				distance = (distance Shl 3) Or (token And 7)
			Else
				found = 4 : ReadVarInt(distance)
				distance = (distance Shl 6) Or (token And 63)
			EndIf
			If found > distance Then
				If distance >= 4 Then
					For inValP = outData+indOut To outData+indOut+found-1 Step 4
						Poke Long, inValP, Peek(Long,inValP-distance)
					Next
				ElseIf distance < 3 Then
					k = 4-distance
					For inValP = outData+indOut To outData+indOut+k-1
						*inValP = *(inValP-distance)
					Next
					For inValP = outData+indOut+k To outData+indOut+found-1 Step 4
						Poke Long, inValP, Peek(Long,inValP-4)
					Next
				Else
					For inValP = outData+indOut To outData+indOut+found-1 Step 3
						Poke Long, inValP, Peek(Long,inValP-3)
					Next
				EndIf
			Else
				MemCopy(outData+indOut-distance,outData+indOut,OptSize(found))
			EndIf
			indOut += found
		Else
			If token < 48 Then
				If token <> 47 Then
					literal = token + 2
				Else
					ReadVarInt(literal) : literal += 49
				EndIf
				MemCopy(inData+indIn,outData+indOut,OptSize(literal))
				indIn += literal : indOut += literal
			Else
				If bufFl Then
					outData[indOut] = bufLiteral Or (token And 15) : bufFl = 0
				Else
					bufLiteral = token Shl 4 : bufFl = 1
					outData[indOut] = inData[indIn] : indIn += 1
				EndIf
				indOut += 1
			EndIf
		EndIf
	Loop While indIn < inSize
	Return 1
End Function

Function CaptionSize(outData As UByte Ptr, inSize As Integer) As Integer
	Dim As UByte Ptr outValP
	Dim As Integer i,inVal,indOut=4
	WriteCaption()
	If inSize < 49 Then
		outData[indOut] = IIf(inSize > 1,inSize-2,48)
		indOut += 1
	Else
		outData[indOut] = 47 : indOut += 1
		WriteVarInt(inSize-49)
	EndIf
	Return indOut
End Function

Function Compr0(cptn As UByte Ptr,captSize As Integer,inData As UByte Ptr,_
								inSize As Integer,outData As UByte Ptr) As Integer
	MemCopy(cptn,outData,captSize)
	MemCopy(inData,outData+captSize,inSize)
	Return inSize + captSize
End Function

#Macro Dekodmethod()
	Var Compr = @Compr4
	Select Case method
		Case 1 : Compr = @Compr1
		Case 2 : Compr = @Compr2
		Case 3 : Compr = @Compr3
		Case 5 : Compr = @Compr5
		Case 6 : Compr = @Compr6
	End Select
#EndMacro

Type TypeStr
  As ZString Ptr data1
  As Integer len1, size1
End Type

Function Compress OverLoad (inSt As String,	method As Integer=4) As String
	If Len(inSt) = 0 Then Return ""
	If method Then
		Dim As UByte Ptr outData = Allocate(Len(inSt)*1.04+16)
		If outData = 0 Then Return ""
		Dekodmethod() : Var outDataSize = Compr(SAdd(inSt),Len(inSt),outData)
		Dim As TypeStr sTp = Type(Cast(ZString Ptr,outData),outDataSize,outDataSize+1)
	  Function = *Cast(String Ptr, @sTp)
	  DeAllocate outData
	Else
		Dim As UByte cptn(20)
		Var captSize = CaptionSize(@cptn(0),Len(inSt))
		Var size = Len(inSt) + captSize, outSt = Space(size)
		If Len(outSt) <> size Then Return ""
		Compr0(@cptn(0),captSize,SAdd(inSt),Len(inSt),SAdd(outSt)) : Return outSt
	EndIf
End Function

Function Compress(inData As UByte Ptr,inSize As Integer,_
									ByRef outData As UByte Ptr,method As Integer=4) As Integer
	If inData = 0 Or inSize = 0 Then Return -1
	If method Then
		outData = Allocate(inSize*1.04+16)
		If outData = 0 Then Return 0
		Dekodmethod() : Return Compr(inData,inSize,outData)
	Else
		Dim As UByte cptn(20)
		Var rZg = CaptionSize(@cptn(0),inSize)
		outData = Allocate(inSize + rZg)
		If outData = 0 Then Return 0
		Return Compr0(@cptn(0),rZg,inData,inSize,outData)
	EndIf
End Function

Function ReadCaption(inData As UByte Ptr, ByRef indIn As Integer) As Integer
	If Peek(Long,inData) <> &H09477A4C Then Return 0
	Dim As UByte Ptr inValP
	Dim As Integer outDataSize, k
	ReadVarInt(outDataSize)
	Return outDataSize
End Function

Function Decompress OverLoad (inSt As String) As String
	If Len(inSt) < 7 Then Return ""
	Dim As UByte Ptr inData = SAdd(inSt)
	Var indIn = 4
	Var outDataSize = ReadCaption(inData,indIn)
	If outDataSize = 0 Then Return ""
	Dim As UByte Ptr outData = Allocate(outDataSize+8)
	If outData = 0 Then Return ""
	If Decmpr(inData+indIn,Len(inSt)-indIn,outData) Then
		Dim As TypeStr sTp = Type(Cast(ZString Ptr,outData),outDataSize,outDataSize+1)
	  Function = *Cast(String Ptr, @sTp)
	EndIf
  DeAllocate outData
End Function

Function Decompress(inData As UByte Ptr,inSize As Integer,_
												ByRef outData As UByte Ptr) As Integer
	If inData = 0 Or inSize < 7 Then Return -1
	Var indIn = 4, outDataSize = ReadCaption(inData,indIn)
	If outDataSize = 0 Then Return 0
	outData = Allocate(outDataSize+8)
	If outData = 0 Then Return 0
	If Decmpr(inData+indIn,inSize-indIn,outData) Then Return outDataSize
End Function

End Namespace
neil
Posts: 594
Joined: Mar 17, 2022 23:26

Re: simple compressor LZg9

Post by neil »

Thanks for posting. Your file compressed to about 4k. I was researching data compressors and came across this site. Using the experimental data compressor PAQ9A your file compressed down to 2.4k. The source code is written in C++. https://mattmahoney.net/dc/
neil
Posts: 594
Joined: Mar 17, 2022 23:26

Re: simple compressor LZg9

Post by neil »

I just tested your compressor against this FreeBasic huffman compressor. The Huffman compressed to 12k. Yours compressed to 4k. You have a clear winner. Are you going to continue working on yours? I would use yours over the huffman one.

Code: Select all

' huffman compressor - decompressor For FreeBasic
' to compress huffman c myfile.txt myfile.huf
' to decompress huffman d myfile.huf myfile.txt

' updated by badidea for fbc 1.09.0 32/64-bit linux/windows

'----------------------------- simple binary tree ------------------------------
namespace bt

  type node_t
    index        as integer
    node_flags   as ubyte
    node(0 to 1) as node_t ptr
  end type
 
  const leaf_node = 0, branch_node = 1
 
  function create_leaf (index as integer = 0) as node_t ptr
    dim as node_t ptr node_new = callocate(sizeof(node_t))
    node_new->index = index
    node_new->node_flags = leaf_node
    return node_new
  end function
 
  function create_branch (index as integer = 0, byval node0 as node_t ptr = 0, byval node1 as node_t ptr = 0) as node_t ptr
    dim as node_t ptr node_new = callocate(sizeof(node_t))
    node_new->index = index
    node_new->node_flags = branch_node
    node_new->node(0) = node0
    node_new->node(1) = node1
    return node_new
  end function
 
  function destroy (byval node_given as node_t ptr) as ubyte
    if node_given->node_flags and branch_node then
      if node_given->node(0) then destroy(node_given->node(0))
      if node_given->node(1) then destroy(node_given->node(1))
    end if
    deallocate(node_given)
    return 0
  end function
 
  'size in bits?
  function calculate_size (byval node_given as node_t ptr) as uinteger
    dim as uinteger size
    if node_given->node_flags and branch_node then
      if node_given->node(0) then size += calculate_size(node_given->node(0))
      if node_given->node(1) then size += calculate_size(node_given->node(1))
    else
      size += 8 'leaf = byte value?
    end if
    size += 1 '1 bit per branch depth
    return size
  end function
 
  function save_tree (byval node_given as node_t ptr, bt_data as ubyte ptr, _
    byref bt_data_ptr as uinteger = 0, byref bt_data_bit_ptr as byte = 8) as ubyte
    '
    if bt_data_bit_ptr = 0 then
      bt_data_ptr += 1
      bt_data_bit_ptr = 8
    end if
    bt_data_bit_ptr -= 1
    if node_given->node_flags and branch_node then
      bt_data[bt_data_ptr] = bitreset(bt_data[bt_data_ptr], bt_data_bit_ptr)
      if node_given->node(0) then save_tree(node_given->node(0), bt_data, bt_data_ptr, bt_data_bit_ptr)
      if node_given->node(1) then save_tree(node_given->node(1), bt_data, bt_data_ptr, bt_data_bit_ptr)
    else 'leaf_node
      bt_data[bt_data_ptr] = bitset(bt_data[bt_data_ptr], bt_data_bit_ptr)
      dim as ubyte i
      for i = 0 to 7
        if bt_data_bit_ptr = 0 then
          bt_data_ptr += 1
          bt_data_bit_ptr = 8
        end if
        bt_data_bit_ptr -= 1
        if bit(node_given->index, 7 - i) then
          bt_data[bt_data_ptr] = bitset(bt_data[bt_data_ptr], bt_data_bit_ptr)
        else
          bt_data[bt_data_ptr] = bitreset(bt_data[bt_data_ptr], bt_data_bit_ptr)
        end if
      next
    end if
    return 0
  end function
 
  function load_tree (byval bt_data as ubyte ptr, byref bt_data_ptr as uinteger = 0, _
    byref bt_data_bit_ptr as byte = 8) as node_t ptr
    '
    dim as node_t ptr return_node
    if bt_data_bit_ptr = 0 then
      bt_data_ptr += 1
      bt_data_bit_ptr = 8
    end if
    bt_data_bit_ptr -= 1
    if bit(bt_data[bt_data_ptr], bt_data_bit_ptr) = 0 then
      return_node = create_branch(-1)
      return_node->node(0) = load_tree(bt_data, bt_data_ptr, bt_data_bit_ptr)
      return_node->node(1) = load_tree(bt_data, bt_data_ptr, bt_data_bit_ptr)
    else
      dim as uinteger index
      dim as ubyte i
      for i = 0 to 7
        if bt_data_bit_ptr = 0 then
          bt_data_ptr += 1
          bt_data_bit_ptr = 8
        end if
        bt_data_bit_ptr -= 1
        if bit(bt_data[bt_data_ptr], bt_data_bit_ptr) then
          index = bitset(index, 7 - i)
        else
          index = bitreset(index, 7 - i)
        end if
      next
      return_node = create_leaf(index)
    end if
    return return_node
  end function
 
  function dump (byval node_given as node_t ptr, indent as ubyte = 0) as ubyte
    if node_given->node_flags and branch_node then
      print string(indent, 32) & "branch: " & node_given->index
      if node_given->node(0) then dump(node_given->node(0), indent + 2)
      if node_given->node(1) then dump(node_given->node(1), indent + 2)
    else
      print string(indent, 32) & "leaf: " & node_given->index
    end if
    return 0
  end function
 
  function qtable (byval node_given as node_t ptr, path() as ulong, bits() as ubyte, byref cpath as ulong = 0, byref cbits as ubyte = 0) as ubyte
    if cbits > 31 then print "QTABLE FAILED!" : stop
    if node_given->node_flags and branch_node then
      if node_given->node(0) then qtable(node_given->node(0), path(), bits(), (cpath shl 1), cbits + 1)
      if node_given->node(1) then qtable(node_given->node(1), path(), bits(), (cpath shl 1) or 1, cbits + 1)
    else
      path(node_given->index) = cpath
      bits(node_given->index) = cbits
    end if
    return 0
  end function
 
end namespace

'----------------------------- simple linked list ------------------------------
namespace ll

  type node_t
    index      as integer
    prev_node  as node_t ptr
    next_node  as node_t ptr
    foo        as any ptr
  end type

  const as uinteger no_node = 0
 
  declare function create (index as integer = 0) as node_t ptr
  declare function destroy (byval node_given as node_t ptr) as node_t ptr
  declare function insert_after (byval node_given as node_t ptr, index as integer = 0) as node_t ptr
  declare function deleet (byval node_given as node_t ptr) as node_t ptr
  declare function nswap (byval node_given1 as node_t ptr, byval node_given2 as node_t ptr) as ubyte
  declare function seek_first (byval node_given as node_t ptr) as node_t ptr
  declare function sort_index (byval node_given as node_t ptr) as ubyte
  declare function count (byval node_given as node_t ptr) as uinteger
  declare function dump (byval node_given as node_t ptr) as ubyte
 
  function create (index as integer = 0) as node_t ptr
    dim as node_t ptr node_new = callocate(sizeof(node_t))
    node_new->index = index
    return node_new
  end function

  function destroy (byval node_given as node_t ptr) as node_t ptr
    dim as node_t ptr node_current = node_given
    node_current = seek_first(node_current)
    do until (node_current->next_node = no_node)
      node_current = node_current->next_node
      deallocate(node_current->prev_node)
    loop
    deallocate(node_current)
    return no_node
  end function

  function insert_after (byval node_given as node_t ptr, index as integer = 0) as node_t ptr
    dim as node_t ptr node_new = callocate(sizeof(node_t))
    if not(node_given->next_node = no_node) then
      node_new->next_node = node_given->next_node
      node_given->next_node->prev_node = node_new
    end if
    node_given->next_node = node_new
    node_new->prev_node = node_given
    node_new->index = index
    return node_new
  end function

  function deleet (byval node_given as node_t ptr) as node_t ptr
    dim as node_t ptr node_current
    if (node_given->prev_node = no_node) and (node_given->prev_node = no_node) then
      node_current = no_node
    elseif (node_given->prev_node = no_node) then
      node_current = node_given->next_node
      node_given->next_node->prev_node = no_node
    elseif (node_given->next_node = no_node) then
      node_current = node_given->prev_node
      node_given->prev_node->next_node = no_node
    else
      node_current = node_given->next_node
      node_given->prev_node->next_node = node_given->next_node
      node_given->next_node->prev_node = node_given->prev_node
    end if
    deallocate(node_given)
    return node_current
  end function

  function nswap (byval node_given1 as node_t ptr, byval node_given2 as node_t ptr) as ubyte
    swap node_given1->index, node_given2->index
    swap node_given1->foo, node_given2->foo
    return 0
  end function

  function seek_first (byval node_given as node_t ptr) as node_t ptr
    dim as node_t ptr node_current = node_given
    do until (node_current->prev_node = no_node)
      node_current = node_current->prev_node
    loop
    return node_current
  end function

  function sort_index (byval node_given as node_t ptr) as ubyte
    dim as node_t ptr node_current = node_given
    dim as node_t ptr node_grab, node_match
    node_current = seek_first(node_current)
    node_grab = node_current
    do until (node_grab->next_node = no_node)
      node_match = node_grab
      node_current = node_grab
      do until (node_current->next_node = no_node)
        node_current = node_current->next_node
        if (node_current->index < node_match->index) then node_match = node_current
      loop
      if (node_match <> node_grab) then nswap(node_match, node_grab)
      node_grab = node_grab->next_node
    loop
    return 0
  end function

  function count (byval node_given as node_t ptr) as uinteger
    dim as node_t ptr node_current = node_given
    dim as uinteger ncount = 1
    node_current = seek_first(node_current)
    do until (node_current->next_node = no_node)
      node_current = node_current->next_node
      ncount += 1
    loop
    return ncount
  end function

  function dump (byval node_given as node_t ptr) as ubyte
    dim as node_t ptr node_current = node_given
    node_current = seek_first(node_current)
    print "index", "foo", "previous", "next"
    do
      print hex(node_current->index, 8), hex(node_current->foo, 8),;
      if not(node_current->prev_node = no_node) then
        print hex(node_current->prev_node->index, 8), ';
      else
        print " -", ';
      end if
      if not(node_current->next_node = no_node) then
        print hex(node_current->next_node->index, 8)
      else
        print " -"
        exit do
      end if
      node_current = node_current->next_node
    loop
    return 0
  end function

end namespace

'-------------------------------- main program ---------------------------------

'File format (1-based):
' 1 ...  4 = "HUFF"
' 5 ...  8 = ts (tree size)
' 9 ... 12 = cs (compressed data size)
'13 ... 16 = ds (original data size)
'tree data
'compressed data

select case command(1)
'---------------------------------- compress -----------------------------------
case "c"
  dim as ubyte ptr ddata, cdata, tdata
  dim as ulong ds, cs, ts
  dim as uinteger di, ci 
  dim as ubyte cb = 8

  'original input file
  dim as long f = freefile
  open command(2) for binary access read as #f
  ds = lof(f) 'data size
  ddata = callocate(ds) 'raw data
  get #f, , *ddata, ds 'read all
  close #f
 
  print "  building tree..."
 
  dim as uinteger u(255)

  'histogram, count byte occurance
  for di = 0 to ds - 1
    u(ddata[di]) += 1
  next
 
  dim as ll.node_t ptr l

  'loop histogram, build linked list
  dim as uinteger i
  for i = 0 to 255
    if u(i) then '> 0?
      '~ print chr(i), u(i)
      if l then
        l = ll.insert_after(l, u(i))
      else
        l = ll.create(u(i))
      end if
      l->foo = bt.create_leaf(i)
    end if
  next

  '~ ll.dump(l)

  '~ for i as integer = 0 to 255
    '~ print u(i)
  '~ next
 
  do while ll.count(l) > 1
    ll.sort_index(l)
    l = ll.seek_first(l)
   
    l->foo = bt.create_branch(-1, l->foo, l->next_node->foo)
    l->index += l->next_node->index
    ll.deleet(l->next_node)
  loop

  dim as bt.node_t ptr b

  b = l->foo
  ll.destroy(l)
 
  ts = (bt.calculate_size(b) + 7) \ 8
  '~ print "bt.calculate_size(b): " & bt.calculate_size(b) & " bits"
  tdata = callocate(ts)

  bt.save_tree(b, tdata)

  '~ bt.dump(b)

  '~ for i as integer = 0 to ts-1
    '~ print bin(tdata[i])
  '~ next

  'write hufmann output file
  dim as long o = freefile
  open command(3) for binary access write as #o
  put #o, , "HUFF"
  put #o, , ts 'tree size
  '~ print "ts: " & ts
  seek #o, 13
  put #o, , ds
  '~ print "ds: " & ds
  put #o, , *tdata, ts 'tree data
 
  dim as ulong qtp(255)
  dim as ubyte qtb(255)
  bt.qtable(b, qtp(), qtb())
 
  print "  compressing..." ';
 
  cdata = callocate(ds)
  for di = 0 to ds - 1
    if qtb(ddata[di]) then
      for i = 0 to qtb(ddata[di]) - 1
        if cb = 0 then
          ci += 1
          cb = 8
        end if
        cb -= 1
        cdata[ci] shl= 1
        cdata[ci] or= abs(bit(qtp(ddata[di]), qtb(ddata[di]) - i - 1))
      next
    end if
    if di mod 1024 = 0 then locate , 80 - 26 : print " " & string(fix(di / ds * 20), ".") & string(20 - fix(di / ds * 20), "o") & " " & fix(di / ds * 100) & "%" ';
  next
 
  cdata[ci] shl= cb

  cs = ci + 1
  put #o, , *cdata, cs
  put #o, 9, cs
  '~ print "cs: " & cs
 
  bt.destroy(b)
 
  deallocate(ddata)
  deallocate(cdata)

  close #o 'added
  
'--------------------------------- decompress ----------------------------------
case "d"
  dim as ubyte ptr tdata, cdata, ddata
  dim as uinteger ci, di
  dim as ulong ts, cs, ds
  dim as ubyte cb = 8
  dim as string header = string(4, 0)
 
  dim as long f = freefile
  open command(2) for binary access read as #f

  get #f, , header
  if header <> "HUFF" then
    close #f
    print "invalid source file (HUFF expected)"
    end
  end if
  seek #f, 5
  get #f, , ts
  '~ print "ts: " & ts
  tdata = callocate(ts)
  get #f, , cs
  '~ print "cs: " & cs
  cdata = callocate(cs)
  get #f, , ds
  '~ print "ds: " & ds
  ddata = callocate(ds)
  get #f, , *tdata, ts
  get #f, , *cdata, cs
 
  close #f
 
  print "  reading tree..."
 
  dim as bt.node_t ptr tr = bt.load_tree(tdata), tc
  deallocate(tdata)
 
  print "  decompressing..." ';
 
  do while di < ds
    tc = tr
    do while tc->node_flags and bt.branch_node
      if cb = 0 then
        ci += 1
        cb = 8
      end if
      cb -= 1
      tc = tc->node(abs(bit(cdata[ci], cb)))
    loop
    ddata[di] = tc->index
    di += 1
    if di mod 1024 = 0 then locate , 80 - 26 : print " " & string(fix(di / ds * 20), ".") & string(20 - fix(di / ds * 20), "o") & " " & fix(di / ds * 100) & "%" ';
  loop
 
  dim as long o = freefile
  open command(3) for binary access write as #o
 
  put #o, , *ddata, ds
 
  close #o
 
  bt.destroy(tr)
 
  deallocate(cdata)
  deallocate(ddata)

'------------------------------ test, show tree --------------------------------
case "t"
  dim as ubyte ptr tdata
  dim as ulong ts, cs, ds
 
  dim as long f = freefile
  open command(2) for binary access read as #f
 
  dim as string header = string(4, 0)
  get #f, , header
  if header <> "HUFF" then
    close #f
    print "Invalid source file (HUFF expected)"
    end
  end if
  seek #f, 5
  get #f, , ts
  tdata = callocate(ts)
  get #f, , cs
  get #f, , ds
  get #f, , *tdata, ts
 
  close #f

  '~ for i as integer = 0 to ts-1
    '~ print tdata[i]
  '~ next
  dim as bt.node_t ptr tr = bt.load_tree(tdata), tc
  deallocate(tdata)
 
  bt.dump(tr)
 
  bt.destroy(tr)
 
case else
  print command(0) & " c|d src dst"
  print "c = compress, d = decompress"
 
end select
Vitamin
Posts: 3
Joined: Nov 04, 2023 18:33

Re: simple compressor LZg9

Post by Vitamin »

Are you going to continue working on yours?
The work has come to an end, so I provided the result.
neil
Posts: 594
Joined: Mar 17, 2022 23:26

Re: simple compressor LZg9

Post by neil »

I just thought of another method to compress ABRACADABRA 11 Bytes to 4 Bytes.

Code: Select all

' compress 11 bytes to 4 bytes by neil

Dim AS UByte a,b,c,d,i,cnt,n
Dim AS String s1,s2,s3

' ABRACADABRA Compressed 11 Bytes to 4 Bytes
a = 125:b = 131:c = 41:d = 251

Print "Compressed ABRACADABRA to 4 Bytes 125 131 41 251"
Print

' Decompress
Print "Decompressed 125 131 41 251 to 11 Bytes ";
for n = 1 to 4
cnt = 3
If n = 1 Then s1 = str(a)
If n = 2 Then s1 = str(b)
If n = 3 Then s1 = str(c):cnt = 2
If n = 4 Then s1 = str(d)

for i = 1 to cnt
s2 = mid(s1,i,1)
if s2 = "1" Then s3 = "A"
if s2 = "2" Then s3 = "B"
if s2 = "3" Then s3 = "C"
if s2 = "4" Then s3 = "D"
if s2 = "5" Then s3 = "R"
print s3;
next
next
Sleep
neil
Posts: 594
Joined: Mar 17, 2022 23:26

Re: simple compressor LZg9

Post by neil »

For this method to work, you have to store an extra five letters in a file, so the compression would go from 11 bytes to 9 bytes, which is not so good. I am abandoning this idea.
neil
Posts: 594
Joined: Mar 17, 2022 23:26

Re: simple compressor LZg9

Post by neil »

Here's another idea this one works. This will always compress any 8 capital letters down to 5 bytes. No patterns or redundancy are required.

Code: Select all

'8 bytes to 5 bytes by neil

Dim As UByte i,n,a,b,c,d,cb,cnt
Dim AS String s1,s2,s3,s4,s5,s6,sa,sb

'8 capital letters or word to compress
sa = "RESEARCH"

s1 = " !@-#.ABCDEFGHIJKLMNOPQRSTUVWXYZ"
s2 = "0123456789ABCDEF0123456789ABCDEF"
n = 1
print
print "String to Compress "
print sa
print
print "5 Compressed Bytes"

for i = 1 to 8

s3 = mid(sa,i,1)
Do
s4 = mid(s1,n,1)
s5 = mid(s2,n,1)
n += 1
Loop until s4 = s3
n = 1 

'control byte and bits needed for referencing lookup table
if i = 1 and asc(s4) > 74 Then cb += 128
if i = 2 and asc(s4) > 74 Then cb += 64
if i = 3 and asc(s4) > 74 Then cb += 32
if i = 4 and asc(s4) > 74 Then cb += 16
if i = 5 and asc(s4) > 74 Then cb += 8
if i = 6 and asc(s4) > 74 Then cb += 4
if i = 7 and asc(s4) > 74 Then cb += 2
if i = 8 and asc(s4) > 74 Then cb += 1 

cnt += 1
sb = sb + s5
if cnt = 2 Then a = val("&H" + sb):print hex(a,2);" ";:sb = ""
if cnt = 4 Then b = val("&H" + sb):print hex(b,2);" ";:sb = ""
if cnt = 6 Then c = val("&H" + sb):print hex(c,2);" ";:sb = ""
if cnt = 8 Then d = val("&H" + sb):print hex(d,2);" ";:sb = ""
next

Print Hex(cb,2);" "
Print
Print "8 Decompressed Bytes"

If (cb and 128) = 0 THEN cnt = 1
If (cb and 128) THEN cnt = 17

s3 = hex(a,2)
s4 = left(s3,1)

for i = cnt to 32
s5 = mid(s2,i,1)
s6 = mid(s1,i,1)

if s4 = s5 Then Exit For
next
print s6;

If (cb and 64) = 0 THEN cnt = 1
If (cb and 64) THEN cnt = 17
s3 = hex(a,2)
s4 = right(s3,1)

for i = cnt to 32
s5 = mid(s2,i,1)
s6 = mid(s1,i,1)

if s4 = s5 Then Exit For
next
print s6;

If (cb and 32) = 0 THEN cnt = 1
If (cb and 32) THEN cnt = 17

s3 = hex(b,2)
s4 = left(s3,1)

for i = cnt to 32
s5 = mid(s2,i,1)
s6 = mid(s1,i,1)

if s4 = s5 Then Exit For
next
print s6;

If (cb and 16) = 0 THEN cnt = 1
If (cb and 16) THEN cnt = 17

s3 = hex(b,2)
s4 = right(s3,1)

for i = cnt to 32
s5 = mid(s2,i,1)
s6 = mid(s1,i,1)
if s4 = s5 Then Exit For
next
print s6;

If (cb and 8) = 0 THEN cnt = 1
If (cb and 8) THEN cnt = 17

s3 = hex(c,2)
s4 = left(s3,1)

for i = cnt to 32
s5 = mid(s2,i,1)
s6 = mid(s1,i,1)

if s4 = s5 Then Exit For
next
print s6;

If (cb and 4) = 0 THEN cnt = 1
If (cb and 4) THEN cnt = 17

s3 = hex(c,2)
s4 = right(s3,1)

for i = cnt to 32
s5 = mid(s2,i,1)
s6 = mid(s1,i,1)
if s4 = s5 Then Exit For
next
print s6;

If (cb and 2) = 0 THEN cnt = 1
If (cb and 2) THEN cnt = 17

s3 = hex(d,2)
s4 = left(s3,1)

for i = cnt to 32
s5 = mid(s2,i,1)
s6 = mid(s1,i,1)

if s4 = s5 Then Exit For
next
print s6;

If (cb and 1) = 0 THEN cnt = 1
If (cb and 1) THEN cnt = 17

s3 = hex(d,2)
s4 = right(s3,1)

for i = cnt to 32
s5 = mid(s2,i,1)
s6 = mid(s1,i,1)
if s4 = s5 Then Exit For
next
print s6
print
sleep
Last edited by neil on Nov 19, 2023 22:58, edited 1 time in total.
paul doe
Moderator
Posts: 1736
Joined: Jul 25, 2017 17:22
Location: Argentina

Re: simple compressor LZg9

Post by paul doe »

neil wrote: Nov 07, 2023 21:01 Here's another idea this one works. This will always compress any 8 capital letters down to 5 bytes. No patterns or redundancy are required.
Consider opening your own thread if you're gonna try other methods, so as to not pollute this one, yes?
neil
Posts: 594
Joined: Mar 17, 2022 23:26

Re: simple compressor LZg9

Post by neil »

@paul doe. I agree please delete my previous posts.
paul doe
Moderator
Posts: 1736
Joined: Jul 25, 2017 17:22
Location: Argentina

Re: simple compressor LZg9

Post by paul doe »

No need. Just open a new thread if you want to discuss a different approach :idea:
Post Reply