|  |  |  | @@ -3,7 +3,8 @@ | 
		
	
		
			
				|  |  |  |  | ## | 
		
	
		
			
				|  |  |  |  | ## Utility to compare the file contents of two directory trees. | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | import os, tables, streams, sequtils, strutils, docopt, marshal | 
		
	
		
			
				|  |  |  |  | import std/[json, jsonutils, os, tables, sequtils, strutils] | 
		
	
		
			
				|  |  |  |  | import docopt | 
		
	
		
			
				|  |  |  |  | import incremental_md5, console_progress | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | import ./cliconstants | 
		
	
	
		
			
				
					
					|  |  |  | @@ -69,94 +70,116 @@ proc getRelPath(ancestor, child: string): string = | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | type | 
		
	
		
			
				|  |  |  |  |   FileEntry* = tuple[relPath: string, checksum: string] | 
		
	
		
			
				|  |  |  |  |   FileEntry* = ref tuple[relPath: string, checksum: string] | 
		
	
		
			
				|  |  |  |  |     ## Data about one file that has been analyzed | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |   DirAnalysis* = ## Analysis data about one directory tree. | 
		
	
		
			
				|  |  |  |  |     tuple[allEntries: seq[ref FileEntry], | 
		
	
		
			
				|  |  |  |  |           byRelPath: ref Table[string, ref FileEntry], | 
		
	
		
			
				|  |  |  |  |           byChecksum: ref Table[string, seq[ref FileEntry]]] | 
		
	
		
			
				|  |  |  |  |     tuple[allEntries: seq[FileEntry], | 
		
	
		
			
				|  |  |  |  |           byRelPath: TableRef[string, FileEntry], | 
		
	
		
			
				|  |  |  |  |           byChecksum: TableRef[string, seq[FileEntry]]] | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |   DisplayOptions = tuple[left, right, same, content, path: bool] | 
		
	
		
			
				|  |  |  |  |     ## Consolidated description of which types of results to display. | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | func `$`(f: FileEntry): string = f.checksum & ": " & f.relPath | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | proc getOrFail(n: JsonNode, key: string, objName: string = ""): JsonNode = | 
		
	
		
			
				|  |  |  |  |   ## convenience method to get a key from a JObject or raise an exception | 
		
	
		
			
				|  |  |  |  |   if not n.hasKey(key): raise newException(Exception, objName & " missing key '" & key & "'") | 
		
	
		
			
				|  |  |  |  |   return n[key] | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | proc getIfExists(n: JsonNode, key: string): JsonNode = | 
		
	
		
			
				|  |  |  |  |   ## convenience method to get a key from a JObject or return null | 
		
	
		
			
				|  |  |  |  |   result = if n.hasKey(key): n[key] | 
		
	
		
			
				|  |  |  |  |            else: newJNull() | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | func parseFileEntry(n: JsonNode): FileEntry = | 
		
	
		
			
				|  |  |  |  |   result = new(FileEntry) | 
		
	
		
			
				|  |  |  |  |   result.relPath = n.getOrFail("relPath").getStr | 
		
	
		
			
				|  |  |  |  |   result.checksum = n.getOrFail("checksum").getStr | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | func initDirAnalysis(): DirAnalysis = | 
		
	
		
			
				|  |  |  |  |   (allEntries: @[], | 
		
	
		
			
				|  |  |  |  |    byRelPath: newTable[string, FileEntry](), | 
		
	
		
			
				|  |  |  |  |    byChecksum: newTable[string, seq[FileEntry]]()) | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | func indexEntries(da: var DirAnalysis) = | 
		
	
		
			
				|  |  |  |  |   for e in da.allEntries: | 
		
	
		
			
				|  |  |  |  |     da.byRelPath[e.relPath] = e | 
		
	
		
			
				|  |  |  |  |     if not da.byChecksum.hasKey(e.checksum): | 
		
	
		
			
				|  |  |  |  |       da.byChecksum[e.checksum] = newSeq[FileEntry]() | 
		
	
		
			
				|  |  |  |  |     da.byChecksum[e.checksum].add(e) | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | proc analyzeDir*(root: string, progress: ProgressWrapper): DirAnalysis = | 
		
	
		
			
				|  |  |  |  |   ## Inspect a directory and analyze all files, noting their relative paths and | 
		
	
		
			
				|  |  |  |  |   ## checksum of their contents. | 
		
	
		
			
				|  |  |  |  |   let fileCount = countFiles(root) | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |   progress.init(root, fileCount) | 
		
	
		
			
				|  |  |  |  |   progress.init(root, fileCount + 10) | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |   result = (allEntries: @[], | 
		
	
		
			
				|  |  |  |  |             byRelPath: newTable[string, ref FileEntry](), | 
		
	
		
			
				|  |  |  |  |             byChecksum: newTable[string, seq[ref FileEntry]]()) | 
		
	
		
			
				|  |  |  |  |   result = initDirAnalysis() | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |   var count = 0 | 
		
	
		
			
				|  |  |  |  |   for file in walkDirRec(root): | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |     # Compute checksum | 
		
	
		
			
				|  |  |  |  |     let md5sum = fileToMd5(file) | 
		
	
		
			
				|  |  |  |  |     var fileEntry: ref FileEntry = new(ref FileEntry) | 
		
	
		
			
				|  |  |  |  |     fileEntry[] = (relPath: getRelPath(root, file), checksum: md5sum ) | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |     # Add to allEntries list, byRelPath table, and byChecksum table | 
		
	
		
			
				|  |  |  |  |     var fileEntry: FileEntry = new(FileEntry) | 
		
	
		
			
				|  |  |  |  |     fileEntry[] = (relPath: getRelPath(root, file), checksum: md5sum) | 
		
	
		
			
				|  |  |  |  |     result.allEntries.add(fileEntry) | 
		
	
		
			
				|  |  |  |  |     result.byRelPath[fileEntry.relPath] = fileEntry | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |     if not result.byChecksum.hasKey(fileEntry.relPath): | 
		
	
		
			
				|  |  |  |  |       result.byChecksum[fileEntry.checksum] = newSeq[ref FileEntry]() | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |     result.byChecksum[fileEntry.checksum].add(fileEntry) | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |     progress.update(count, file) | 
		
	
		
			
				|  |  |  |  |     count += 1 | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |   result.indexEntries | 
		
	
		
			
				|  |  |  |  |   count += 10 | 
		
	
		
			
				|  |  |  |  |   progress.finish() | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | proc loadAnalysis*(path: string, analysis: var DirAnalysis) = | 
		
	
		
			
				|  |  |  |  | proc loadAnalysis*(path: string): DirAnalysis = | 
		
	
		
			
				|  |  |  |  |   ## Load a previously performed directory analysis. | 
		
	
		
			
				|  |  |  |  |   let inStream: Stream = newFileStream(path, fmRead) | 
		
	
		
			
				|  |  |  |  |   load(inStream, analysis) | 
		
	
		
			
				|  |  |  |  |   let allEntriesJson = parseJson(readFile(path)) | 
		
	
		
			
				|  |  |  |  |   result = initDirAnalysis() | 
		
	
		
			
				|  |  |  |  |   result.allEntries = toSeq(items(allEntriesJson)).map(parseFileEntry) | 
		
	
		
			
				|  |  |  |  |   result.indexEntries | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | proc saveAnalysis*(path: string, analysis: DirAnalysis): void = | 
		
	
		
			
				|  |  |  |  |   ## Save a completed analysis. | 
		
	
		
			
				|  |  |  |  |   let outStream = newFileStream(path, fmWrite) | 
		
	
		
			
				|  |  |  |  |   store(outStream, analysis) | 
		
	
		
			
				|  |  |  |  |   writeFile(path, $(analysis.allEntries.toJson)) | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | proc intersection*(left, right: DirAnalysis): seq[ref FileEntry] = | 
		
	
		
			
				|  |  |  |  | proc intersection*(left, right: DirAnalysis): seq[FileEntry] = | 
		
	
		
			
				|  |  |  |  |   ## Find all ``FileEntry`` that are the same on both sides: matching contents | 
		
	
		
			
				|  |  |  |  |   ## and paths. | 
		
	
		
			
				|  |  |  |  |   return left.allEntries.filter do (item: ref FileEntry) -> bool: | 
		
	
		
			
				|  |  |  |  |   return left.allEntries.filter do (item: FileEntry) -> bool: | 
		
	
		
			
				|  |  |  |  |     if not right.byRelPath.hasKey(item.relPath): return false | 
		
	
		
			
				|  |  |  |  |     let match = right.byRelPath[item.relPath] | 
		
	
		
			
				|  |  |  |  |     if match == nil: return false | 
		
	
		
			
				|  |  |  |  |     return item.checksum == match.checksum | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | proc difference*(left, right: DirAnalysis): seq[ref FileEntry] = | 
		
	
		
			
				|  |  |  |  | proc difference*(left, right: DirAnalysis): seq[FileEntry] = | 
		
	
		
			
				|  |  |  |  |   ## Find all ``FileEntry`` that are present in the left but not present in | 
		
	
		
			
				|  |  |  |  |   ## the right. | 
		
	
		
			
				|  |  |  |  |   return left.allEntries.filter do (item: ref FileEntry) -> bool: | 
		
	
		
			
				|  |  |  |  |   return left.allEntries.filter do (item: FileEntry) -> bool: | 
		
	
		
			
				|  |  |  |  |     return not right.byRelPath.hasKey(item.relPath) and | 
		
	
		
			
				|  |  |  |  |            not right.byChecksum.hasKey(item.checksum) | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | proc `*`*(left, right: DirAnalysis): seq[ref FileEntry] {.inline.} = | 
		
	
		
			
				|  |  |  |  | proc `*`*(left, right: DirAnalysis): seq[FileEntry] {.inline.} = | 
		
	
		
			
				|  |  |  |  |   ## Alias for `intersection(left, right) <#intersection>`_ | 
		
	
		
			
				|  |  |  |  |   return intersection(left, right) | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | proc `-`*(left, right: DirAnalysis): seq[ref FileEntry] {.inline.} = | 
		
	
		
			
				|  |  |  |  | proc `-`*(left, right: DirAnalysis): seq[FileEntry] {.inline.} = | 
		
	
		
			
				|  |  |  |  |   ## Alias for `difference(left, right) <#difference>`_ | 
		
	
		
			
				|  |  |  |  |   return difference(left, right) | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | proc samePathDifferentContents*(left, right: DirAnalysis): seq[string] = | 
		
	
		
			
				|  |  |  |  |   ## Find all ``FileEntry`` that have the same paths in both trees but whose | 
		
	
		
			
				|  |  |  |  |   ## contents differ. | 
		
	
		
			
				|  |  |  |  |   let matchingEntries = left.allEntries.filter do (item: ref FileEntry) -> bool: | 
		
	
		
			
				|  |  |  |  |   let matchingEntries = left.allEntries.filter do (item: FileEntry) -> bool: | 
		
	
		
			
				|  |  |  |  |     if not right.byRelPath.hasKey(item.relPath): return false | 
		
	
		
			
				|  |  |  |  |     let match = right.byRelPath[item.relPath] | 
		
	
		
			
				|  |  |  |  |     return item.checksum != match.checksum | 
		
	
		
			
				|  |  |  |  |   return matchingEntries.map(proc(item: ref FileEntry): string = return item.relPath) | 
		
	
		
			
				|  |  |  |  |   return matchingEntries.map(proc(item: FileEntry): string = return item.relPath) | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | proc sameContentsDifferentPaths*(left, right: DirAnalysis): seq[tuple[left, right: ref FileEntry]] = | 
		
	
		
			
				|  |  |  |  | proc sameContentsDifferentPaths*(left, right: DirAnalysis): seq[tuple[left, right: FileEntry]] = | 
		
	
		
			
				|  |  |  |  |   ## Find all ``FileEntry`` whose contents are the same in both trees but | 
		
	
		
			
				|  |  |  |  |   ## which are located at differenc paths. | 
		
	
		
			
				|  |  |  |  |   result = @[] | 
		
	
	
		
			
				
					
					|  |  |  | @@ -192,7 +215,7 @@ when isMainModule: | 
		
	
		
			
				|  |  |  |  |     if fileInfo.kind == pcDir: | 
		
	
		
			
				|  |  |  |  |       return analyzeDir(path, progressWrapper) | 
		
	
		
			
				|  |  |  |  |     elif fileInfo.kind == pcFile: | 
		
	
		
			
				|  |  |  |  |       loadAnalysis(path, result) | 
		
	
		
			
				|  |  |  |  |       result = loadAnalysis(path) | 
		
	
		
			
				|  |  |  |  |     else: | 
		
	
		
			
				|  |  |  |  |       quitWithError($path & ": is not a file or directory") | 
		
	
		
			
				|  |  |  |  |  | 
		
	
	
		
			
				
					
					|  |  |  | @@ -202,8 +225,8 @@ when isMainModule: | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |   if not args["<right>"]: | 
		
	
		
			
				|  |  |  |  |     rightAnalysis = (allEntries: @[], | 
		
	
		
			
				|  |  |  |  |                  byRelPath: newTable[string, ref FileEntry](), | 
		
	
		
			
				|  |  |  |  |                  byChecksum: newTable[string, seq[ref FileEntry]]()) | 
		
	
		
			
				|  |  |  |  |                  byRelPath: newTable[string, FileEntry](), | 
		
	
		
			
				|  |  |  |  |                  byChecksum: newTable[string, seq[FileEntry]]()) | 
		
	
		
			
				|  |  |  |  |   else: | 
		
	
		
			
				|  |  |  |  |     var rightPath: string = $args["<right>"] | 
		
	
		
			
				|  |  |  |  |     rightAnalysis = loadPath(rightPath) | 
		
	
	
		
			
				
					
					|  |  |  |   |