text comparison proof of concept (WinMerge alternative)

Post your working scripts, libraries and tools for AHK v1.1 and older
User avatar
jeeswg
Posts: 6902
Joined: 19 Dec 2016, 01:58
Location: UK

text comparison proof of concept (WinMerge alternative)

11 Oct 2017, 12:10

This script is a proof of concept for text comparison (a WinMerge alternative).

What it does is look for the longest line that two strings have in common, and then look above/below for additional adjacent lines, that the strings also have in common. The script does this multiple times.

By doing this the script ends up with regions of lines in each string, alternative matching/non-matching regions. Two htm files are produced accordingly, with text in the non-matching regions highlighted red, and fragment identifiers placed at the start of each red region. The htm files are opened with Internet Explorer and you can then press q or w to jump to the next/previous region.

In future I might try to do something like this with Scintilla controls instead of htm files.

I would welcome other people submitting similar scripts, either that use a similar approach, or a quite different approach, with the final goal of text comparison. I couldn't find anything else like this on the forums so far. Thanks for reading.

Code: Select all

;text comparison (WinMerge alternative)
;note: creates two htm files in the Desktop folder each time it's run

vPath1 = %A_Desktop%\MyFile1.txt
vPath2 = %A_Desktop%\MyFile2.txt
FileRead, vText1, % vPath1
FileRead, vText2, % vPath2

if (vText1 = "")
{
	vText1 = ;continuation section
	(LTrim
	aaaaa
	bbbbb
	ccccc
	dddddz
	eeeee
	fffff
	ggggg
	hhhhhz
	iiiii
	jjjjj
	kkkkk
	lllll
	mmmmmz
	nnnnn
	ooooo
	ppppp
	qqqqq
	rrrrr
	sssssz
	ttttt
	uuuuu
	vvvvv
	wwwww
	xxxxx
	yyyyy
	zzzzz
	)
	vText1 .= "`n" vText1
}

if (vText2 = "")
{
	vText2 = ;continuation section
	(LTrim
	aaaaa
	bbbbb
	ccccc
	dddddz
	dddddz
	dddddz
	eeeee
	fffffx
	ggggg
	hhhhhz
	iiiii
	jjjjj
	kkkkk
	lllll
	mmmmmz
	nnnnn
	ooooo
	pppppx
	qqqqq
	rrrrr
	sssssz
	ttttt
	uuuuu
	vvvvv
	wwwww
	xxxxxx
	yyyyy
	zzzzz
	)
	vText2 .= "`n" vText2
}

;==================================================

;MsgBox, % vText1
;MsgBox, % vText2
vText1 := StrReplace(vText1, "`r`n", "`n")
vText2 := StrReplace(vText2, "`r`n", "`n")
;oArray# - a key for each line in text
oArray1 := StrSplit(vText1, "`n")
oArray2 := StrSplit(vText2, "`n")
oLen1X := []
oLen2X := []
oLen1 := []
oLen2 := []
;oLenX# - a key for the length of each line in text
Loop, % oArray1.Length()
	oLen1X[A_Index] := StrLen(oArray1[A_Index])
Loop, % oArray2.Length()
	oLen2X[A_Index] := StrLen(oArray2[A_Index])
;vList#X - ;region summaries e.g. 1-10, lines 1 to 10 are yet to be checked, e.g. g11-20/b11-20, this region has been checked
vList1X := "1-" oArray1.Length()
vList2X := "1-" oArray2.Length()

Loop
{
	vList1 := vList1X, vList2 := vList2X
	vList1X := "", vList2X := ""
	oList1 := StrSplit(vList1, ",")
	oList2 := StrSplit(vList2, ",")
	vCount := 0
	;loop through both region summaries at the same time
	Loop, % oList1.Length()
	{
		oLen1 := [], oLen2 := []
		vOrig1 := oList1[A_Index]
		vOrig2 := oList2[A_Index]
		if RegExMatch(vOrig1, "^[bg]")
		{
			vList1X .= vOrig1 ",", 	vList2X .= vOrig2 ",", vCount += 1
			continue
		}
		;===============
		;oLen#X[10] for example will include the indexes of every line in vText1, with length 10
		;by using negative numbers, numbers with a bigger magnitude appear earlier in the for loop
		oTemp := StrSplit(vOrig1, "-")
		vOrig1A := oTemp.1, vOrig1B := oTemp.2
		vIndex := oTemp.1
		Loop, % oTemp.2 - oTemp.1 + 1
		{
			vLen := oLen1X[vIndex]
			vIndex++
			if oLen1.HasKey(-vLen)
				oLen1[-vLen] .= "," vIndex
			else
				oLen1[-vLen] := vIndex
		}
		oTemp := StrSplit(vOrig2, "-")
		vOrig2A := oTemp.1, vOrig2B := oTemp.2
		vIndex := oTemp.1
		Loop, % oTemp.2 - oTemp.1 + 1
		{
			vLen := oLen2X[vIndex]
			vIndex++
			if oLen2.HasKey(-vLen)
				oLen2[-vLen] .= "," vIndex
			else
				oLen2[-vLen] := vIndex
		}
		;===============
		vDoBreak := 0
		vNum1 := vNum2 := ""
		for vKey, vValue in oLen1
		{
			Loop, Parse, vValue, % ","
			{
				vNum1 := A_LoopField
				vValue2 := oLen2[vKey]
				Loop, Parse, vValue2, % ","
				{
					vNum2 := A_LoopField
					if (oArray1[vNum1] = oArray2[vNum2])
						vDoBreak := 1
					if vDoBreak
						break
				}
				if vDoBreak
					break
			}
			if vDoBreak
				break
		}
		;===============
		if !vDoBreak
		{
			vList1X .= "b" vOrig1 ","
			vList2X .= "b" vOrig2 ","
			continue
		}
		vTemp1A := vTemp1B := vNum1
		vTemp2A := vTemp2B := vNum2
		vFinal1A := vFinal1B := vNum1
		vFinal2A := vFinal2B := vNum2
		Loop
		{
			vTemp1A -= 1, vTemp2A -= 1
			if !vTemp1A || !vTemp2A
				break
			if (oArray1[vTemp1A] = oArray2[vTemp2A])
				vFinal1A -= 1, vFinal2A -= 1
			else
				break
		}
		Loop
		{
			vTemp1B += 1, vTemp2B += 1
			if (vTemp1B > oArray1.Length())
			|| (vTemp2B > oArray2.Length())
				break
			if (oArray1[vTemp1B] = oArray2[vTemp2B])
				vFinal1B += 1, vFinal2B += 1
			else
				break
		}
		vBefore1 := "b" vOrig1A "-" (vFinal1A-1)
		vBefore2 := "b" vOrig2A "-" (vFinal2A-1)
		vAfter1 := (vFinal1B+1) "-" vOrig1B
		vAfter2 := (vFinal2B+1) "-" vOrig2B

		(vOrig1A = vFinal1A) ? (vBefore1 := "b" vBefore1) : 0
		(vOrig2A = vFinal2A) ? (vBefore2 := "b" vBefore2) : 0
		(vOrig1B = vFinal1A) ? (vAfter1 := "b" vAfter1) : 0
		(vOrig2B = vFinal2A) ? (vAfter2 := "b" vAfter2) : 0

		vFinal1 := "g" vFinal1A "-" vFinal1B
		vFinal2 := "g" vFinal2A "-" vFinal2B

		vList1X .= vBefore1 "," vFinal1 "," vAfter1 ","
		vList2X .= vBefore2 "," vFinal2 "," vAfter2 ","

		vPreview := "z " vFinal1 " " vFinal2
		. "`r`n" "z " vBefore1 " " vBefore2
		. "`r`n" "z " vAfter1 " " vAfter2
	}
	vList1X := RTrim(vList1X, ",")
	vList2X := RTrim(vList2X, ",")
	if (vCount = oList1.Length())
		break
}

MsgBox, % vList1X "`r`n`r`n" vList2X

vList1X := RegExReplace(vList1X, "[bg]")
vList2X := RegExReplace(vList2X, "[bg]")
oFinal1 := StrSplit(vList1X, ",")
oFinal2 := StrSplit(vList2X, ",")
vOutput1 := ""
vOutput2 := ""

;==============================
if 0
Loop, % oFinal1.Length()
{
	vPfx := (A_Index & 1) ? "B" : "G"
	oTempA := StrSplit(oFinal1[A_Index], "-")
	oTempB := StrSplit(oFinal2[A_Index], "-")
	Loop, % oTempA.2 - oTempA.1 + 1
		vOutput1 .= vPfx "`t" oArray1[oTempA.1-1+A_Index] "`r`n"
	Loop, % oTempB.2 - oTempB.1 + 1
		vOutput2 .= vPfx "`t" oArray2[oTempB.1-1+A_Index] "`r`n"
}
;==============================

vOutput1 := ""
VarSetCapacity(vOutput1, StrLen(vText1)*2)
vOutput2 := ""
VarSetCapacity(vOutput2, StrLen(vText2)*2)
vOutput1 := "<!DOCTYPE HTML PUBLIC ""-//W3C//DTD HTML 4.0 Transitional//EN"">" "`r`n"
vOutput1 .= "<html>" "`r`n" "<body>" "`r`n"
vOutput2 := "<!DOCTYPE HTML PUBLIC ""-//W3C//DTD HTML 4.0 Transitional//EN"">" "`r`n"
vOutput2 .= "<html>" "`r`n" "<body>" "`r`n"

vPfxX := "[DIFF]"
vPfxX := ""
Loop, % oFinal1.Length()
{
	vPfx := (A_Index & 1) ? "B" : "G"
	oTempA := StrSplit(oFinal1[A_Index], "-")
	oTempB := StrSplit(oFinal2[A_Index], "-")
	if (vPfx = "B")
	{
		vID++
		vOutput1 .= "<span style=""background: #ff0000"" id=""" vID """>" vPfxX
		vOutput2 .= "<span style=""background: #ff0000"" id=""" vID """>" vPfxX
	}
	Loop, % oTempA.2 - oTempA.1 + 1
		vOutput1 .= JEE_StrToHtml(oArray1[oTempA.1-1+A_Index]) "<br>"
	Loop, % oTempB.2 - oTempB.1 + 1
		vOutput2 .= JEE_StrToHtml(oArray2[oTempB.1-1+A_Index]) "<br>"
	if (vPfx = "B")
	{
		vOutput1 .= "</span>"
		vOutput2 .= "</span>"
	}
}
vOutput1 .= "</body>" "`r`n"
vOutput2 .= "</body>" "`r`n"

vScriptDir = C:\Users\JE\Desktop\jee\ahk folder
vPathWinMerge = %vScriptDir%\Exe\+\WinMerge-2.14.0-exe\WinMergeU.exe
;JEE_WinMergeCompareStrings(vOutput1, vOutput2)

vNow := A_Now
vPath1 = %A_Desktop%\z txt compare %vNow% 1.htm
vPath2 = %A_Desktop%\z txt compare %vNow% 2.htm
FileAppend, % vOutput1, % "*" vPath1, UTF-8
FileAppend, % vOutput2, % "*" vPath2, UTF-8
Run, iexplore.exe "%vPath1%"
Run, iexplore.exe "%vPath2%"
WinWait, %vPath2%
WinGet, hWnd1, ID, %vPath1% ahk_class IEFrame
WinGet, hWnd2, ID, %vPath2% ahk_class IEFrame

WinGetPos,,,, vTaskbarH, ahk_class Shell_TrayWnd
vWinW := A_ScreenWidth / 2
vWinH := (A_ScreenHeight-vTaskbarH)
WinMove, % "ahk_id " hWnd1,, 0, 0, % vWinW, % vWinH
WinMove, % "ahk_id " hWnd2,, % vWinW, 0, % vWinW, % vWinH
return

;==================================================

q:: ;text comparison - navigate to next fragment identifier
w:: ;text comparison - navigate to previous fragment identifier
WinGet, hWnd, ID, A
WinGet, vWinList, List, ahk_class IEFrame
hWnd1 := vWinList1
hWnd2 := vWinList2
oWB1 := WBGet("ahk_id " hWnd1)
oWB2 := WBGet("ahk_id " hWnd2)
vUrl1 := oWB1.document.url
vUrl2 := oWB2.document.url
oTemp1 := StrSplit(vUrl1, "#")
oTemp2 := StrSplit(vUrl2, "#")
vNum := (oTemp1.2 = "") ? 1 : oTemp1.2
if InStr(A_ThisHotkey, "w")
	vNum--
else
	vNum++
vUrl1X := oTemp1.1 "#" vNum
vUrl2X := oTemp2.1 "#" vNum
MsgBox, % vUrl1X "`r`n" vUrl2X
oWB1.Navigate(vUrl1X)
oWB2.Navigate(vUrl2X)
oWB1 := ""
oWB2 := ""
return

;==================================================

JEE_StrToHtml(vText)
{
	Transform, vHtml, HTML, % vText
	return vHtml
}

;==================================================

;Basic Webpage Controls with JavaScript / COM - Tutorial - Tutorials - AutoHotkey Community
;https://autohotkey.com/board/topic/47052-basic-webpage-controls-with-javascript-com-tutorial/

WBGet(WinTitle="ahk_class IEFrame", Svr#=1) {               ;// based on ComObjQuery docs
   static msg := DllCall("RegisterWindowMessage", "str", "WM_HTML_GETOBJECT")
        , IID := "{0002DF05-0000-0000-C000-000000000046}"   ;// IID_IWebBrowserApp
;//     , IID := "{332C4427-26CB-11D0-B483-00C04FD90119}"   ;// IID_IHTMLWindow2
   SendMessage msg, 0, 0, Internet Explorer_Server%Svr#%, %WinTitle%
   if (ErrorLevel != "FAIL") {
      lResult:=ErrorLevel, VarSetCapacity(GUID,16,0)
      if DllCall("ole32\CLSIDFromString", "wstr","{332C4425-26CB-11D0-B483-00C04FD90119}", "ptr",&GUID) >= 0 {
         DllCall("oleacc\ObjectFromLresult", "ptr",lResult, "ptr",&GUID, "ptr",0, "ptr*",pdoc)
         return ComObj(9,ComObjQuery(pdoc,IID,IID),1), ObjRelease(pdoc)
      }
   }
}

;==================================================
homepage | tutorials | wish list | fun threads | donate
WARNING: copy your posts/messages before hitting Submit as you may lose them due to CAPTCHA
Helgef
Posts: 4709
Joined: 17 Jul 2016, 01:02
Contact:

Re: text comparison proof of concept (WinMerge alternative)

14 Oct 2017, 18:18

I tried it the other day, it seemed to work. Cool :thumbup:

Code: Select all

Transform, vHtml, HTML, % vText
I didn't know, fancy stuff.
Thanks for sharing, cheers.

Return to “Scripts and Functions (v1)”

Who is online

Users browsing this forum: Drake, Google [Bot] and 179 guests