Sort function + extra features

Post your working scripts, libraries and tools
User avatar
jeeswg
Posts: 5112
Joined: 19 Dec 2016, 01:58
Location: UK

Sort function + extra features

Post by jeeswg » 10 Jun 2018, 17:01

- I was wanting to create a variant of the Sort function.
- To support various features:
- bound functions
- linear arrays
- null sort (what I've called the 'null sort', the equivalent of a 3-parameter comparison function that returns -offset)
- remove duplicates: all/adjacent
- remove duplicates without sorting the list
- reverse a list without sorting it
- sort by string length
- sort by the nth column
- sort case insensitive 'upper' (as though strings were converted to upper case before comparison) (the default is to convert to lower case prior to comparison)
- sort individual characters (no delimiter character) (e.g. alphabetise/randomise/reverse strings)
- sort reverse order special (swap if different, else maintain order)
- stable sort

- I mentioned some Sort command queries here:
Sort: Z mode and CRLF handling - AutoHotkey Community
https://autohotkey.com/boards/viewtopic.php?f=5&t=50364

- Here are some examples, and a prototype function (and an auxiliary 'filter' function):
- Note: the two functions haven't been tested extensively.

Code: Select all

q:: ;Sort examples
vText := "ABCabc"
MsgBox, % SortNew(vText, "XD") ;alphabetise string
MsgBox, % SortNew(vText, "XD R") ;reverse alphabetical order (if two items are the same, swap them)
MsgBox, % SortNew(vText, "XD XR") ;reverse alphabetical order (if two items are the same, maintain their order)
MsgBox, % SortNew(vText, "XD XN R") ;reverse string
Loop, 5
	MsgBox, % "random: " SortNew(vText, "XD Random") ;randomise string

vText := " ;continuation section
(
a	q	3
b	w	02
c	e	1
)"
MsgBox, % SortNew(vText, "XCN2") ;sort by col 2
MsgBox, % SortNew(vText, "XCN2 R") ;sort by col 2 (reverse)
MsgBox, % SortNew(vText, "XCN3") ;sort by col 3
MsgBox, % SortNew(vText, "XCN3 R") ;sort by col 3 (reverse)
MsgBox, % SortNew(vText, "XCN1") ;sort by col 1
MsgBox, % SortNew(vText, "XCN1 R") ;sort by col 1 (reverse)
MsgBox, % SortNew(vText, "XCN1 N") ;sort by col 1 (numeric)
MsgBox, % SortNew(vText, "XCN1 R N") ;sort by col 1 (reverse + numeric)

vText := "aaa,bb,cccc,d,eeeee"
MsgBox, % SortNew(vText, "D, XL") ;sort by length

;note: XN is the null sort, which means that the order is maintained (unchanged)

vText := "c,b,a,c,b,a"
MsgBox, % SortNew(vText, "D, XN U") ;remove duplicates, case insensitive

vText := "c,b,a,C,B,A,c,b,a,C,B,A"
MsgBox, % SortNew(vText, "D, XN U C") ;remove duplicates, case sensitive

vText := "c,b,a,C,B,A,c,b,a,C,B,A"
MsgBox, % SortNew(vText, "D, XN U N") ;remove duplicates, numeric
vText := "c,b,a,3,2,1,c,b,a,3,2,1"
MsgBox, % SortNew(vText, "D, XN U N") ;remove duplicates, numeric
MsgBox, % SortNew(vText, "D, U N") ;(sort numeric and) remove duplicates, numeric

vText := "c,c,b,a,c,c,b,a"
MsgBox, % SortNew(vText, "D, XN XUA") ;remove adjacent duplicates, case insensitive
vText := "c,c,b,a,c,C,b,a"
MsgBox, % SortNew(vText, "D, XN XUA C") ;remove adjacent duplicates, case sensitive

vText := "c,b,a,3,2,1,c,b,a,3,2,1"
MsgBox, % SortNew(vText, "D, XN U N") ;remove duplicates, numeric
MsgBox, % SortNew(vText, "D, U N") ;(sort numeric and) remove duplicates, numeric
MsgBox, % SortNew(vText, "D, XN XUA N") ;remove adjacent duplicates, numeric
MsgBox, % SortNew(vText, "D, XUA N") ;(sort numeric and) remove adjacent duplicates, numeric

vText := ""
Loop, 127
	vText .= Chr(A_Index)
MsgBox, % SortNew(vText, "XD") ;case insensitive (convert to lower case then compare)
MsgBox, % SortNew(vText, "XD XCIU") ;case insensitive (convert to upper case then compare)
return

;==================================================

;original options: C CL Dx F N Pn R Random U Z \
;new option: XCDx ;column delimiter (e.g. XCD,), (default: tab), specify the delimiter for the 'sort by the nth column' option
;new option: XCIU ;case-insensitive comparison, sorting is case insensitive (as though strings were converted to upper case before comparison)
;new option: XCNn ;column number (e.g. XCN2), sort by the nth column
;new option: XD ;no delimiter, sort individual characters
;new option: XL ;sort items by string length
;new option: XN ;null sort, e.g. to reverse items without sorting them
;new option: XR ;reverse special (swap if different, else maintain order)
;new option: XS/XS1 ;sort stable
;new option: XU/XS0 ;sort unstable
;new option: XUA ;remove duplicates (adjacent duplicates only)
;note: by default, i.e. if C/CL are omitted, sorting is case insensitive (as though strings were converted to lower case before comparison)
;note: for the U option, all duplicates are removed not just adjacent duplicates (duplicates are determined based on the C/CL/N options)
;note: vText can be a linear array (and the function will return a linear array)
;note: vFunc can be a function name or a bound func
;warning: if U and CL are both specified, the CL is ignored
global AX_SortStable
SortNew(vText, vOpt:="", vFunc:="")
{
	global vGblSortByLen, vGblSortCasInsU, vGblSortCol, vGblSortColDelim, vGblSortFunc, vGblSortFuncType, vGblSortNull, vGblSortNum, vGblSortPos, vGblSortRev, vGblSortRandom, vGblSortStable
	static vIsFunc := IsFunc("SortNew_Filter")
	static number := "number"
	if !vIsFunc
	{
		MsgBox("error: SortNew_Filter function not found")
		return
	}
	vDoSkipLastDelim := 1
	vGblSortCol := 0 ;(partial string)
	vGblSortColDelim := "`t" ;(partial string)
	vDelim := "`n"
	vGblSortByLen := 0
	vGblSortCasInsU := 0
	vGblSortFunc := vFunc
	vGblSortFuncType := ""
	vGblSortNull := 0
	vGblSortNum := 0
	vGblSortPos := 0 ;(partial string)
	vGblSortRandom := 0
	vGblSortRev := 0
	vGblSortStable := (AX_SortStable = "") ? 1 : AX_SortStable

	vSCS := "Off"
	if vPos := RegExMatch(vOpt, "i)(?<=^| |`t)D(?= |`t)")
		vDelim := SubStr(vOpt, vPos+1, 1)
	if vPos := RegExMatch(vOpt, "i)(?<=^| |`t)XCD(?= |`t)")
		vGblSortColDelim := SubStr(vOpt, vPos+3, 1)

	Loop, Parse, vOpt, % " `t"
	{
		if (vTempLast = "F")
			vGblSortFunc := A_LoopField, vGblSortFuncType := "N"
		else if (A_LoopField = "C")
			vSCS := "On"
		else if (A_LoopField = "CL")
			vSCS := "Locale"
		else if (SubStr(A_LoopField, 1, 1) = "D")
			vDelim := SubStr(A_LoopField, 2)
		else if (A_LoopField = "N")
			vGblSortNum := 1
		else if (SubStr(A_LoopField, 1, 1) = "P")
			vGblSortPos := SubStr(A_LoopField, 2)
		else if (A_LoopField = "R")
			vGblSortRev := 1
		else if (A_LoopField = "Random")
			vGblSortRandom := 1
		else if (A_LoopField = "U")
			vDoRemoveDups := "All"
		else if (A_LoopField = "\")
			vGblSortCol := -1, vGblSortColDelim := "\"
		else if (SubStr(A_LoopField, 1, 3) = "XCD")
			vGblSortColDelim := SubStr(A_LoopField, 4)
		else if (A_LoopField = "XCIU")
			vGblSortCasInsU := 1
		else if (SubStr(A_LoopField, 1, 3) = "XCN")
			vGblSortCol := SubStr(A_LoopField, 4)
		else if (A_LoopField = "XD") ;no delimiter
			vNoDelim := 1
		else if (A_LoopField = "XL") ;sort by length
			vGblSortByLen := 1
		else if (A_LoopField = "XN") ;null sort
			vGblSortNull := 1
		else if (A_LoopField = "XR")
			vGblSortRev := -1
		else if (A_LoopField = "XUA")
			vDoRemoveDups := "Adj"
		else if (A_LoopField = "XS") ;sort stable
		|| (A_LoopField = "XS1") ;sort stable
			vGblSortStable := 1
		else if (A_LoopField = "XU") ;sort unstable
		|| (A_LoopField = "XS0") ;sort unstable
			vGblSortStable := 0
		vTempLast := A_LoopField
	}

	StringCaseSense(vSCS)

	if !vGblSortFuncType
	{
		if IsObject(vFunc)
			vGblSortFuncType := "O" ;object
		else if !(vFunc = "")
			vGblSortFuncType := "N" ;name
	}

	if IsObject(vText)
	{
		oArray := vText
		vDelim := JEE_StrUnused(1, oArray*)
		vText := JEE_StrJoin(vDelim, oArray*)
		vIsObj := 1
	}
	else if vNoDelim
	{
		vDelim := JEE_StrUnused(1, vText)
		vText2 := vText
		VarSetCapacity(vText, StrLen(vText)*2*2)
		Loop, Parse, vText2
			vText .= A_LoopField vDelim
		vText := SubStr(vText, 1, -StrLen(vDelim))
	}

	if vDoSkipLastDelim
	&& (SubStr(vText, 1, StrLen(vText)) = vDelim)
		vDoRestoreLastDelim := 1, vText := SubStr(vText, 1, -1)

	if (vDelim = "`n")
		vText := StrReplace(vText, "`r`n", "`n", vCountRN)

	if (vGblSortRev = -1)
		vGblSortStable := 0

	if vGblSortCasInsU
		StringCaseSense("On")
	vText := Sort(vText, "D" vDelim " F SortNew_Filter")
	if vGblSortCasInsU
		StringCaseSense(vSCS)

	if (vDoRemoveDups = "All") && vGblSortNum
	{
		VarSetCapacity(vText2, StrLen(vText)*2)
		oTemp := {}
		Loop, Parse, vText, % vDelim
		{
			if A_LoopField is number
				vTemp := A_LoopField
			else
				vTemp := 0
			if !oTemp.HasKey("z" vTemp)
				oTemp["z" vTemp] := 1
				, vText2 .= A_LoopField vDelim
		}
		vText := SubStr(vText2, 1, -StrLen(vDelim))
	}
	else if (vDoRemoveDups = "All") && ((vSCS = "On") || (vSCS = 1))
	{
		VarSetCapacity(vText2, StrLen(vText)*2)
		oTemp := ComObjCreate("Scripting.Dictionary")
		Loop, Parse, vText, % vDelim
		{
			if !oTemp.Exists("z" A_LoopField)
				oTemp.Item("z" A_LoopField) := 1
				, vText2 .= A_LoopField vDelim
		}
		vText := SubStr(vText2, 1, -StrLen(vDelim))
	}
	else if (vDoRemoveDups = "All")
	{
		VarSetCapacity(vText2, StrLen(vText)*2)
		oTemp := {}
		Loop, Parse, vText, % vDelim
		{
			if !oTemp.HasKey("z" A_LoopField)
				oTemp["z" A_LoopField] := 1
				, vText2 .= A_LoopField vDelim
		}
		vText := SubStr(vText2, 1, -StrLen(vDelim))
	}
	else if (vDoRemoveDups = "Adj")
	{
		vLast := vDelim
		vUnused := JEE_StrUnused(1, vText)
		VarSetCapacity(vText2, StrLen(vText)*2)
		if vGblSortNum
			Loop, Parse, vText, % vDelim
			{
				if A_LoopField is number
					vTemp := A_LoopField
				else
					vTemp := 0
				if !(vLast = vTemp)
					vText2 .= A_LoopField vDelim
				vLast := vTemp
			}
		else
			Loop, Parse, vText, % vDelim
			{
				if (vLast "" != A_LoopField)
					vText2 .= A_LoopField vDelim
				vLast := A_LoopField
			}
		vText := SubStr(vText2, 1, -StrLen(vDelim))
	}
	StringCaseSense(vSCS)
	if vDoRestoreLastDelim
		vText .= vDelim
	if vNoDelim
		vText := StrReplace(vText, vDelim)
	if vIsObj
		return StrSplit(vText, vDelim)

	if vCountRN
		return StrReplace(vText, "`n", "`r`n")
	return vText
}

SortNew_Filter(vTextA, vTextB, vOffset)
{
	global vGblSortByLen, vGblSortCasInsU, vGblSortCol, vGblSortColDelim, vGblSortFunc, vGblSortFuncType, vGblSortNull, vGblSortNum, vGblSortPos, vGblSortRev, vGblSortRandom, vGblSortStable
	static number := "number"

	if vGblSortCasInsU
		vTextA := StrUpper(vTextA)
		, vTextB := StrUpper(vTextB)
	if (vGblSortRev = -1)
		vOffset2 := vOffset
	if !vGblSortStable
		vOffset := 0
	if vGblSortRandom
		return Random(0, 1) ? -1 : 1
	if (vGblSortCol > 0)
		vTextA := StrSplit(vTextA, vGblSortColDelim)[vGblSortCol]
		, vTextB := StrSplit(vTextB, vGblSortColDelim)[vGblSortCol]
	else if (vGblSortCol < 0)
		oTempA := StrSplit(vTextA, vGblSortColDelim)
		, oTempB := StrSplit(vTextB, vGblSortColDelim)
		, vTextA := oTempA[oTempA.Length()-vGblSortCol]
		, vTextB := oTempB[oTempB.Length()-vGblSortCol]
	if vGblSortPos ;if positive or negative
		vTextA := JEE_SubStr(vTextA, vGblSortPos)
		, vTextB := JEE_SubStr(vTextB, vGblSortPos)
	if vGblSortNull
		vRet := -vOffset
	else if vGblSortFuncType
		vRet := %vGblSortFunc%(vTextA, vTextB, vOffset)
	else if vGblSortByLen
		vLenA := StrLen(vTextA), vLenB := StrLen(vTextB)
		, vRet := (vLenA > vLenB) ? 1 : (vLenA < vLenB) ? -1 : -vOffset
	else if vGblSortNum
	{
		if vTextA is not number
			vTextA := 0
		if vTextB is not number
			vTextB := 0
		vRet := (vTextA > vTextB) ? 1 : (vTextA < vTextB) ? -1 : -vOffset
	}
	else
		vRet := ("" vTextA > vTextB) ? 1 : ("" vTextA < vTextB) ? -1 : -vOffset
	if vGblSortRev
		if vRet
			vRet *= -1
		else ;only possible if vGblSortRev = -1
			vRet := -vOffset2
	return vRet
}

;==================================================

; ;e.g.
; oArray := StrSplit("abcdefghijklmnopqrstuvwxyz")
; MsgBox, % JEE_StrJoin(" - ", oArray*)
; MsgBox, % JEE_StrJoin(["=","`r`n"], oArray*)
; MsgBox, % JEE_StrJoin(["`t","`r`n"], oArray*)
; MsgBox, % JEE_StrJoin(["`t","`t","`r`n"], oArray*)
; MsgBox, % JEE_StrJoin(["`t","`t","`t","`r`n"], oArray*)
; MsgBox, % JEE_StrJoin(["`t","`t","`t","`t","`r`n"], oArray*)
; MsgBox, % JEE_StrJoin(["","","","","`r`n"], oArray*)

JEE_StrJoin(vSep, oArray*)
{
	VarSetCapacity(vOutput, oArray.Length()*200*2)
	if IsObject(vSep) && (vSep.Length() = 1) ;convert 1-item array to string
		vSep := vSep.1
	if !IsObject(vSep)
	{
		Loop, % oArray.MaxIndex()-1
			vOutput .= oArray[A_Index] vSep
		vOutput .= oArray[oArray.MaxIndex()]
	}
	else
	{
		oSep := vSep, vCount := oSep.Length(), vIndex := 0
		Loop, % oArray.MaxIndex()-1
		{
			;vIndex := Mod(A_Index-1, vCount)+1
			vIndex := (vIndex = vCount) ? 1 : vIndex+1
			, vOutput .= oArray[A_Index] oSep[vIndex]
		}
		vOutput .= oArray[oArray.MaxIndex()]
	}
	return vOutput
}

;==================================================

JEE_StrUnused(vNum, oArray*)
{
	VarSetCapacity(vText, 1000*oArray.Length()*2)
	Loop, % oArray.Length()
		vText .= oArray[A_Index]
	vCount := 0
	Loop, 65535
		if !InStr(vText, Chr(A_Index))
		{
			vOutput .= Chr(A_Index)
			vCount++
			if (vCount = vNum)
				break
		}
	;return StrSplit(vOutput)
	return vOutput
}

;==================================================

;works like SubStr (AHK v2) (on AHK v1/v2)
JEE_SubStr(ByRef vText, vPos, vLen:="")
{
	static vIsV1 := !!SubStr(1, 0)
	if (vPos = 0)
		return
	if vIsV1 && (vPos <= -1)
		vPos++
	if (vLen = "")
		return SubStr(vText, vPos)
	return SubStr(vText, vPos, vLen)
}

;==================================================

;commands as functions (AHK v2 functions for AHK v1) - AutoHotkey Community
;https://autohotkey.com/boards/viewtopic.php?f=37&t=29689

MsgBox(Params*) ;Text, Title, Options
{
    local Match, Options, Result, Temp, Text, Timeout, Title, Type
    static TypeArray := {"OK":0, "O":0, "OKCancel":1, "O/C":1, "OC":1, "AbortRetryIgnore":2, "A/R/I":2, "ARI":2
        , "YesNoCancel":3, "Y/N/C":3, "YNC":3, "YesNo":4, "Y/N":4, "YN":4, "RetryCancel":5, "R/C":5, "RC":5
        , "CancelTryAgainContinue":6, "C/T/C":6, "CTC":6, "Iconx":16, "Icon?":32, "Icon!":48, "Iconi":64
        , "Default2":256, "Default3":512, "Default4":768}

    Text := !Params.Length() ? "Press OK to continue." : Params.HasKey(1) ? Params.1 : ""
    Title := !Params.HasKey(2) ? A_ScriptName : (Params.2 = "") ? " " : Params.2
    Options := Params.3, Timeout := "", Type := 0
    if (Options)
    {
        Loop, Parse, Options, % " `t"
            (Temp := Abs(A_LoopField)) || (Temp := TypeArray[A_LoopField]) ? (Type |= Temp)
                : RegExMatch(A_LoopField, "Oi)^T(\d+\.?\d*)$", Match) ? Timeout := Match.1
                : 0
    }
    MsgBox % Type, % Title, % Text, % Timeout
    Loop Parse, % "Timeout,OK,Cancel,Yes,No,Abort,Ignore,Retry,Continue,TryAgain", % ","
        IfMsgBox % Result := A_LoopField
            break
    return Result
}
Random(Min:="", Max:="")
{
    local OutputVar
    Random OutputVar, %Min%, %Max%
    return OutputVar
}
Sort(String, Options:="")
{
    Sort String, %Options%
    return String
}
StringCaseSense(OnOffLocale)
{
    StringCaseSense %OnOffLocale%
}
StrUpper(String, T:="")
{
    local OutputVar
    StringUpper OutputVar, String, %T%
    return OutputVar
}

;==================================================

Return to “Scripts and Functions”