Jump to content

Sky Slate Blueberry Blackcurrant Watermelon Strawberry Orange Banana Apple Emerald Chocolate
Photo

VarZ - NT Native Data Compression


  • Please log in to reply
20 replies to this topic
SKAN
  • Administrators
  • 9115 posts
  • Last active:
  • Joined: 26 Dec 2005
    gAHK32    
Generic AutoHotkey 32bit

Posted Image 27-Sep-2012



Usage example:

Data =
(
The Quick Brown Fox The Quick Brown Fox The Quick Brown Fox The Quick Brown Fox
The Quick Brown Fox The Quick Brown Fox The Quick Brown Fox The Quick Brown Fox
The Quick Brown Fox The Quick Brown Fox The Quick Brown Fox The Quick Brown Fox
The Quick Brown Fox The Quick Brown Fox The Quick Brown Fox The Quick Brown Fox
The Quick Brown Fox The Quick Brown Fox The Quick Brown Fox The Quick Brown Fox
)

DataSize := StrLen( Data ) * ( A_IsUnicode ? 2 : 1 )
CompressedDataSize := VarZ_Compress( Data, DataSize )
VarZ_Save( Data, CompressedDataSize, "tqbf.txt.lz_" )
VarZ_Load( Data, "tqbf.txt.lz_" )
VarZ_Decompress( Data )

MsgBox % "Orignal size:`t"  DataSize "`n"
       . "Compressed:`t"    CompressedDataSize "`n`n"
       . "[" Data "]"


Wrapper

[color=#225599]/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
__      __      ______
\ \    / /     |___  /           V A R Z  >>>  N A T I V E  D A T A  C O M P R E S S I O N
 \ \  / /_ _ _ __ / /            http://www.autohotkey.com/community/viewtopic.php?t=45559
  \ \/ / _` | '__/ /             Author: Suresh Kumar A N  (email: [email protected])
   \  / (_| | | / /__            Ver 2.0 | Created 19-Jun-2009 | Last Modified 27-Sep-2012
    \/ \__,_|_|/_____|           > http://tinyurl.com/skanbox/AutoHotkey/VarZ/2.0/VarZ.ahk
                                                  |
 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/[/color]

[color=#FF0000]VarZ_Compress([/color] ByRef Data, DataSize, CompressionMode = 0x102 [color=#FF0000])[/color] {

 Static STATUS_SUCCESS := 0x0,   HdrSz := 18

 If ( NumGet( Data ) = 0x005F5A4C )                           ; "LZ_" + Chr(0)
    Return 0, ErrorLevel := -1                                ; already compressed

 DllCall( "ntdll\RtlGetCompressionWorkSpaceSize"
        , UInt,  CompressionMode
        , UIntP, CompressBufferWorkSpaceSize
        , UIntP, CompressFragmentWorkSpaceSize )

 VarSetCapacity( CompressBufferWorkSpace, CompressBufferWorkSpaceSize )

 TempSize := VarSetCapacity( TempData, DataSize )             ; Workspace for Compress

 NTSTATUS := DllCall( "ntdll\RtlCompressBuffer"
                    , UInt,  CompressionMode
                    , UInt,  &Data                            ; Uncompressed data
                    , UInt,  DataSize
                    , UInt,  &TempData                        ; Compressed data
                    , UInt,  TempSize
                    , UInt,  CompressFragmentWorkSpaceSize
                    , UIntP, FinalCompressedSize              ; Compressed data size
                    , UInt,  &CompressBufferWorkSpace
                          ,  UInt )

 If ( NTSTATUS <> STATUS_SUCCESS  ||  FinalCompressedSize + HdrSz > DataSize )
    Return 0, ErrorLevel := ( NTSTATUS ? NTSTATUS : -2 )      ; unable to compress data

 VarSetCapacity( Data, FinalCompressedSize + HdrSz, 0 )       ; Renew variable capacity

 NumPut( 0x005F5A4C, Data )                                   ; "LZ_" + Chr(0)
 Numput( CompressionMode, Data, 8 )                           ; actually "UShort"
 NumPut( DataSize, Data, 10 )                                 ; Uncompressed data size
 NumPut( FinalCompressedSize, Data, 14 )                      ; Compressed data size

 DllCall( "RtlMoveMemory", UInt,  &Data + HdrSz               ; Target pointer
                         , UInt,  &TempData                   ; Source pointer
                         , UInt,  FinalCompressedSize )       ; Data length in bytes

 DllCall( "shlwapi\HashData", UInt,  &Data + 8                ; Read data pointer
                            , UInt,  FinalCompressedSize + 10 ; Read data size
                            , UInt,  &Data + 4                ; Write data pointer
                            , UInt,  4 )                      ; Write data length in bytes

Return FinalCompressedSize + HdrSz
}

;- -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - --

[color=#FF0000]VarZ_Decompress([/color] ByRef Data [color=#FF0000])[/color] {

 Static STATUS_SUCCESS := 0x0,   HdrSz := 18

 If ( NumGet( Data ) <> 0x005F5A4C )                          ; "LZ_" + Chr(0)
    Return 0, ErrorLevel := -1                                ; not natively compressed

 DataSize := NumGet( Data, 14 )                               ; Compressed data size

 DllCall( "shlwapi\HashData", UInt,  &Data + 8                ; Read data pointer
                            , UInt,  DataSize + 10            ; Read data size
                            , UIntP, Hash                     ; Write data pointer
                            , UInt,  4 )                      ; Write data length in bytes

 If ( Hash <> NumGet( Data, 4 ) )                             ; Hash vs Saved hash
    Return 0, ErrorLevel := -2                                ; Hash failed = Data corrupt

 TempSize := NumGet( Data, 10 )                               ; Decompressed data size
 VarSetCapacity( TempData, TempSize, 0 )                      ; Workspace for Decompress

 NTSTATUS := DllCall( "ntdll\RtlDecompressBuffer"
                    , UInt,  NumGet( Data, 8, "UShort" )      ; Compression mode
                    , UInt,  &TempData                        ; Decompressed data
                    , UInt,  TempSize
                    , UInt,  &Data + HdrSz                    ; Compressed data
                    , UInt,  DataSize
                    , UIntP, FinalUncompressedSize            ; Decompressed data size
                           , UInt )

 If ( NTSTATUS <> STATUS_SUCCESS )
    Return 0, ErrorLevel := NTSTATUS                          ; Unable to decompress data

 VarSetCapacity( Data, FinalUncompressedSize, 0 )             ; Renew variable capacity

 DllCall( "RtlMoveMemory", UInt,  &Data                       ; Target pointer
                         , UInt,  &TempData                   ; Source pointer
                         , UInt,  FinalUncompressedSize )     ; Data length in bytes

Return FinalUncompressedSize, VarSetCapacity( Data, -1 )
}

;- -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - --

[color=#FF0000]VarZ_Load([/color] ByRef Data, SrcFile [color=#FF0000])[/color] {
 FileGetSize, DataSize, %SrcFile%
 IfNotEqual, ErrorLevel, 0, Return
 FileRead, Data, *c %SrcFile%
Return DataSize
}

;- -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - --

[color=#FF0000]VarZ_Save([/color] ByRef Data, DataSize, TrgFile [color=#FF0000])[/color] {
 hFile :=  DllCall( "_lcreat", ( A_IsUnicode ? "AStr" : "Str" ),TrgFile, UInt,0 )
 IfLess, hFile, 1, Return "", ErrorLevel := 1
 nBytes := DllCall( "_lwrite", UInt,hFile, UInt,&Data, UInt,DataSize, UInt )
 DllCall( "_lclose", UInt,hFile )
Return nBytes
}

;- -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - -- End of VarZ wrapper

Extra

[color=#FF0000]VarZ_Uncompress([/color] ByRef D [color=#FF0000])[/color] {  [color=#008000]; Shortcode version of VarZ_Decompress() of VarZ 2.0 wrapper[/color]
; VarZ 2.0 by SKAN, 27-Sep-2012. http://www.autohotkey.com/community/viewtopic.php?t=45559
 IfNotEqual, A_Tab, % ID:=NumGet(D), IfNotEqual, ID, 0x5F5A4C,  Return 0, ErrorLevel := -1
 savedHash := NumGet(D,4), TZ := NumGet(D,10), DZ := NumGet(D,14)
 DllCall( "shlwapi\HashData", UInt,&D+8, UInt,DZ+10, UIntP,Hash, UInt,4 )
 IfNotEqual, Hash, %savedHash%, Return 0, ErrorLevel := -2
 VarSetCapacity( TD,TZ,0 ), NTSTATUS := DllCall( "ntdll\RtlDecompressBuffer", UInt
 , NumGet(D,8,"UShort"), UInt, &TD, UInt,TZ, UInt,&D+18, UInt,DZ, UIntP,Final, UInt )
 IfNotEqual, NTSTATUS, 0, Return 0, ErrorLevel := NTSTATUS
 VarSetCapacity( D,Final,0 ), DllCall( "RtlMoveMemory", UInt,&D, UInt,&TD, UInt,Final )
Return Final, VarSetCapacity( D,-1 )
}

Notes:

[*:37pxdbk4] When VarZ_Compress() fails, AHK errorlevel is set to either of the most probable NTSTATUS values

[*:37pxdbk4]0x00000117 - STATUS_BUFFER_ALL_ZEROS
The specified buffer contains all zeros.
[*:37pxdbk4]0xC0000023 - STATUS_BUFFER_TOO_SMALL
The buffer is too small to contain the entry. No information has been written to the buffer.
For simple understanding, the former error occurs when you pass an empty variable and the latter error will occur when compressed data is larger than the source. RtlCompressBuffer() will actually bloat the data, when you try to :
[*:37pxdbk4]Compress a short string
[*:37pxdbk4]Compress a string that has already been compressed with a better compression technology[/list][*:37pxdbk4]By default COMPRESSION_ENGINE_MAXIMUM ( 0x100 ) is enabled and is 5 times slower than the normal compression. To turn it off, pass 0x2 as the third parameter
like: VarZ_Compress( Data, DataSize, 0x2 )

[*:37pxdbk4]Compressed data will be prefixed with an info header sized 18 bytes. If Compression was successful but Compressed Data+Header exceeds the size of Uncompressed Data, then VarZ_Compress() will ignore Compression and return 0

[*:37pxdbk4] MSDN Reference: RtlCompressBuffer, RtlDecompressBuffer, RtlGetCompressionWorkSpaceSize[/list][/list]


[*:37pxdbk4]Discussion on 'double compression' and an alternate version supporting double compression by HotKeyIt
[*:37pxdbk4]I have also written a small sized Win32 console utility, LZ_.exe v1.1 - 64KiB ( Source code ) which can toggle Compression & Decompression for a given file.
kWo4Lk1.png

HotKeyIt
  • Moderators
  • 7439 posts
  • Last active: Jun 22 2016 09:14 PM
  • Joined: 18 Jun 2008
:shock: Thats awesome and will be very useful.
Thank you very much :D

daonlyfreez
  • Members
  • 995 posts
  • Last active: Jan 23 2013 08:16 AM
  • Joined: 16 Mar 2005
Very nice! 8)

I added your MD5 code to RosettaCode btw :wink:

I couldn't open the link to the "original", maybe someone else deserves credit too?
Posted Image mirror 1mirror 2mirror 3ahk4.me • PM or Posted Image

SKAN
  • Administrators
  • 9115 posts
  • Last active:
  • Joined: 26 Dec 2005

I couldn't open the link to the "original"


Sorry! Updated it : <!-- m -->http://www.autohotke... ... 910#275910<!-- m -->

BTW, please correct the rosetta example as follows:

data := "abc"
MsgBox % MD5(data[color=red],StrLen(data)[/color]) ; 900150983cd24fb0d6963f7d28e17f72

When we use assignment operator ( := ) to initialise the variable, the actual length of var which is returned by VarSetCapacity() will be only one of these values 3,7,63,256,257,258,259 until 259!
Proof:
SetBatchLines -1
Gui, Add, ListView, w205 r20 +Grid, String Length|Var Capacity
LV_ModifyCol( 1, "90 Integer" ), LV_ModifyCol( 2, "90 Integer" )
Loop % VarSetCapacity( String, 4096, 32 )
 Var := SubStr( String,1,A_Index ), [color=red]Lv_Add( "", StrLen(Var), VarSetCapacity(Var) )[/color]
Gui, Show,, StrLen Vs VarSetCapacity
Return

Whereas, when initialise a variable by loading data with FileRead command, the length of variable will match the length of the file. ( if filesize is > 2 bytes )
Proof:
FileDelete, test.txt
FileAppend, SKAN, test.txt
FileRead, Data1, test.txt
Data2 := "SKAN"
MsgBox, % Data1 "=" VarSetCapacity( Data1 ) "`n" Data2 "=" VarSetCapacity( Data2 )

Please excuse me if you are already aware of the above said. I chose to post this anyway as these facts heavily affect VarZ wrapper.

Thanks.

n-l-i-d
  • Guests
  • Last active:
  • Joined: --
Code and link corrected.

No, I was not aware of that. Thanks!

SKAN
  • Administrators
  • 9115 posts
  • Last active:
  • Joined: 26 Dec 2005
Have included a small function in VarZ wrapper to overcome the VarSetCapacity effect.

MsgBox, % VarZ_SetVar( Var, "AutoHotkey" )

VarZ_SetVar( byRef V, S="" ) {
Return VarSetCapacity(V,N:=StrLen(S)) + (DllCall("RtlMoveMemory",Str,V,Str,S,UInt,N )<<64)
}


Lexikos
  • Administrators
  • 9844 posts
  • AutoHotkey Foundation
  • Last active:
  • Joined: 17 Oct 2006
If the goal of VarZ_SetVar is to ensure VarSetCapacity() will return the length of the string, you may improve its reliability by prepending this:
VarSetCapacity(V,64), VarSetCapacity(V,0)

; or

if StrLen(S) < VarSetCapacity(V) ; Extra steps are necessary to shrink variable.
{
    if VarSetCapacity(V) < 64    ; Cannot be freed.
        VarSetCapacity(V, 64)    ; Discard current memory and allocate dynamic memory.
    VarSetCapacity(V, 0)         ; Free dynamic memory in order to shrink variable.
}

Since this function is often called simply to ensure the variable has a certain minimum capacity, for performance reasons, it shrinks the variable only when RequestedCapacity is 0. In other words, if the variable's capacity is already greater than RequestedCapacity, it will not be reduced (but the variable will still made blank for consistency).

Therefore, to explicitly shrink a variable, first free its memory with VarSetCapacity(Var, 0) and then use VarSetCapacity(Var, NewCapacity) -- or simply let it auto-expand from zero as needed.

For performance reasons, freeing a variable whose previous capacity was between 1 and 63 might have no effect because its memory is of a permanent type. In this case, the current capacity will be returned rather than 0.

Source: AutoHotkey Documentation: VarSetCapacity()


It's only strictly necessary to do VarSetCapacity(V,64), VarSetCapacity(V,0) once on any given variable since doing so prevents V from receiving permanent memory in future. However, this is undocumented and difficult to detect.

SKAN
  • Administrators
  • 9115 posts
  • Last active:
  • Joined: 26 Dec 2005
Thank you very much for the valuable pointers Lexikos! :)

VarZ_Compress() did work properly with short strings and I now I understand why.. I have updated the wrapper.

Thanks again. :)

grimhana
  • Guests
  • Last active:
  • Joined: --
; Test compressing a small string 
oSize := VarZ_SetVar( Data, "123456789 123456789 123456789 123" )
cSize := VarZ_Compress( Data )
if cSize>0
    VarZ_decompress( Data )
  else 
     Data:=""
MsgBox, % "Original Size:`t" oSize "`nCompressed Size:`t" cSize  "`nDeCompressed:`t" Data 

---result---
---------------------------
Original Size: 33
Compressed Size: 0
DeCompressed:
---------------------------


minimum size limitation? :roll:

Guest_
  • Guests
  • Last active:
  • Joined: --
VarZ_Save( byRef V, File="" ) { ;   www.autohotkey.net/~Skan/wrapper/FileIO16/FileIO16.ahk 
Return ( ( hFile :=  DllCall( "_lcreat", Str,File, UInt,0 ) ) > 0 ) 
                 ?   DllCall( "_lwrite", UInt,hFile, Str,V, UInt,VarSetCapacity(V) ) 
                 + ( DllCall( "_lclose", UInt,hFile ) << 64 ) : 0 
} 

VarZ_Save Return Value is disired Size + 1.
For example, UrlDownloadToFile 100kb but VarZ_Save 101kb.
Maybe VarSetCapacity(V) Seems to result in plus 1 size?
I wanna use your code because of compact size.
Is there any difference?

SKAN
  • Administrators
  • 9115 posts
  • Last active:
  • Joined: 26 Dec 2005
You need to call VarZ_SetVar() with the correct var size before using VarZ_Save()

Other option is to use the following modifed version:

Var := "The Quick Brown Fox"
VarZ_Save2( Var, [color=red]19[/color], "TQBF.txt" )

VarZ_Save2( byRef V, [color=red]Sz[/color], File="" ) { ;   www.autohotkey.net/~Skan/wrapper/FileIO16/FileIO16.ahk
Return ( ( hFile :=  DllCall( "_lcreat", Str,File, UInt,0 ) ) > 0 )
                 ?   DllCall( "_lwrite", UInt,hFile, Str,V, UInt,[color=red]Sz[/color] )
                 + ( DllCall( "_lclose", UInt,hFile ) << 64 ) : 0
}


HotKeyIt
  • Moderators
  • 7439 posts
  • Last active: Jun 22 2016 09:14 PM
  • Joined: 18 Jun 2008
I am not sure if this might be related to that, in VarZ_Compress in Unicode version I faced a problem and had to add following, because otherwise VarSetCapacity did not use correct value.
VarZ_Compress( byref V, Max=1 ) {

 If ( NumGet(V)=0x5F5A4C && ( (M:=NumGet(V,12,"UShort"))=0x102||M=0x2) )

   Return -2                                                    ;    already LZ compressed

 DllCall( "ntdll\RtlGetCompressionWorkSpaceSize", UInt,M:=(!!Max<<8)+2,UIntP,WZ,UIntP,CZ )

 VarSetCapacity(WS,WZ), VZ := VarSetCapacity(V),  TZ := VarSetCapacity(TV,VZ)

 If ( NTSTATUS := DllCall( "ntdll\RtlCompressBuffer", UInt,M, Str,V, UInt,VZ, Str,TV, UInt

                                       ,TZ, UInt,CZ,UIntP,F ,UInt,&WS, UInt ))  || F+18>VZ

   Return (errorLevel:=NTSTATUS) ? -1 : 0                       ;       unable to compress

[color=red] If Mod(F,2)

  F++[/color]

 VarSetCapacity(V) < 64 ? VarSetCapacity(V,64) + VarSetCapacity(V,0) : VarSetCapacity(V,0)

;...



SKAN
  • Administrators
  • 9115 posts
  • Last active:
  • Joined: 26 Dec 2005
@HotkeyIt
Congratz on reaching 3k :D

Thanks for posting the tip. I owe you an explanation on why I have been reluctant in updating the code though you already alerted me through a PM.

Though I have not mentioned it explicitly, the Var_Z wrapper is a direct add-on for AxC² - AutoHotkey Cabinet v1.00 which is 90% uncompatible with Unicode version of AHK. I thought I should upgrade this after I convert AXC², but AXC² became deprecated the moment I posted Resource-Only DLL for Dummies - 36L which again is uncompatible with Unicode version.

I find it pretty hard to write code that can handle ANSI strings in both versions of AHK. Rememer my dumb question? : How to FileRead binary in AHK_LW ?

I need to get a hang of AHK_L before I can support both. Might take a while.

Regards.

HotKeyIt
  • Moderators
  • 7439 posts
  • Last active: Jun 22 2016 09:14 PM
  • Joined: 18 Jun 2008
Thats alright SKAN, thank you ;)

SKAN
  • Administrators
  • 9115 posts
  • Last active:
  • Joined: 26 Dec 2005
Code Updated: The wrapper was rewritten for AHK_Lw 32-bit compatibility.