## Tree Diff ## ========= ## ## Utility to compare the file contents of two directory trees. import os, tables, streams, sequtils, strutils, docopt, marshal import incremental_md5, console_progress type ProgressWrapper* = tuple[impl: Progress, verbosity: Verbosity] ## Wrapper around a console_progress.Progress. Verbosity* = enum ## Enum representing the level of output verbosity the tool will emit. very_quiet, ## suppress all output including the progress indicator quiet, ## suppress all output except the progress indicator normal ## emit all output proc newProgressWrapper*(outFile = stdout, verbosity = normal): ProgressWrapper = ## Create a new ProgressWrapper for the given verbosity. if verbosity > very_quiet: result = (impl: newProgress(outFile, 0), verbosity: verbosity) else: result = (impl: nil, verbosity: verbosity) proc init(p: ProgressWrapper, root: string, fileCount: int): void = if p.verbosity == normal: echo "-- ", root.expandFilename, "\L ", fileCount, " files" if p.verbosity > very_quiet: p.impl.setMax(fileCount) proc update(p: ProgressWrapper, count: int, file: string): void = if p.verbosity > very_quiet: p.impl.updateProgress(count, file[(file.high - 15)..file.high]) proc finish(p: ProgressWrapper): void = if p.verbosity > very_quiet: p.impl.erase if p.verbosity == normal: echo " ", p.impl.getMax, " files.\L" proc countFiles(root: string): int = for file in walkDirRec(root): result += 1 proc getRelPath(ancestor, child: string): string = ## Given a ancestor path and a child path, assuming the child path is ## contained within the ancestor path, return the relative path from the ## ancestor to the child. let ancestorPath = ancestor.expandFilename.split({DirSep, AltSep}) let childPath = child.expandFilename.split({DirSep, AltSep}) # If the ancestor path is longer it cannot contain the child path and we # cannot construct a relative path without backtracking. if (ancestorPath.len > childPath.len): return "" # Compare the ancestor and child path up until the end of the ancestor path. var idx = 0 while idx < ancestorPath.len and ancestorPath[idx] == childPath[idx]: idx += 1 # If we stopped before reaching the end of the ancestor path it must be that # the paths do not match. The ancestor cannot contain the child and we cannot # build a relative path without backtracking. if idx != ancestorPath.len: return "" return foldl(@["."] & childPath[idx..childPath.high], joinPath(a, b)) type FileEntry* = tuple[relPath: string, checksum: string] ## Data about one file that has been analyzed DirAnalysis* = ## Analysis data about one directory tree. tuple[allEntries: seq[ref FileEntry], byRelPath: ref Table[string, ref FileEntry], byChecksum: ref Table[string, seq[ref FileEntry]]] DisplayOptions = tuple[left, right, same, content, path: bool] ## Consolidated description of which types of results to display. proc analyzeDir*(root: string, progress: ProgressWrapper): DirAnalysis = ## Inspect a directory and analyze all files, noting their relative paths and ## checksum of their contents. let fileCount = countFiles(root) progress.init(root, fileCount) result = (allEntries: @[], byRelPath: newTable[string, ref FileEntry](), byChecksum: newTable[string, seq[ref FileEntry]]()) var count = 0 for file in walkDirRec(root): # Compute checksum let md5sum = fileToMd5(file) var fileEntry: ref FileEntry = new(ref FileEntry) fileEntry[] = (relPath: getRelPath(root, file), checksum: md5sum ) # Add to allEntries list, byRelPath table, and byChecksum table result.allEntries.add(fileEntry) result.byRelPath[fileEntry.relPath] = fileEntry if not result.byChecksum.hasKey(fileEntry.relPath): result.byChecksum[fileEntry.checksum] = newSeq[ref FileEntry]() result.byChecksum[fileEntry.checksum].add(fileEntry) progress.update(count, file) count += 1 progress.finish() proc loadAnalysis*(path: string, analysis: var DirAnalysis) = ## Load a previously performed directory analysis. let inStream: Stream = newFileStream(path, fmRead) load(inStream, analysis) proc saveAnalysis*(path: string, analysis: DirAnalysis): void = ## Save a completed analysis. let outStream = newFileStream(path, fmWrite) store(outStream, analysis) proc intersection*(left, right: DirAnalysis): seq[ref FileEntry] = ## Find all ``FileEntry`` that are the same on both sides: matching contents ## and paths. return left.allEntries.filter do (item: ref FileEntry) -> bool: if not right.byRelPath.hasKey(item.relPath): return false let match = right.byRelPath[item.relPath] return item.checksum == match.checksum proc difference*(left, right: DirAnalysis): seq[ref FileEntry] = ## Find all ``FileEntry`` that are present in the left but not present in ## the right. return left.allEntries.filter do (item: ref FileEntry) -> bool: return not right.byRelPath.hasKey(item.relPath) and not right.byChecksum.hasKey(item.checksum) proc `*`*(left, right: DirAnalysis): seq[ref FileEntry] {.inline.} = ## Alias for `intersection(left, right) <#intersection>`_ return intersection(left, right) proc `-`*(left, right: DirAnalysis): seq[ref FileEntry] {.inline.} = ## Alias for `difference(left, right) <#difference>`_ return difference(left, right) proc samePathDifferentContents*(left, right: DirAnalysis): seq[string] = ## Find all ``FileEntry`` that have the same paths in both trees but whose ## contents differ. let matchingEntries = left.allEntries.filter do (item: ref FileEntry) -> bool: if not right.byRelPath.hasKey(item.relPath): return false let match = right.byRelPath[item.relPath] return item.checksum != match.checksum return matchingEntries.map(proc(item: ref FileEntry): string = return item.relPath) proc sameContentsDifferentPaths*(left, right: DirAnalysis): seq[tuple[left, right: ref FileEntry]] = ## Find all ``FileEntry`` whose contents are the same in both trees but ## which are located at differenc paths. result = @[] for item in left.allEntries: if not right.byChecksum.hasKey(item.checksum): continue for match in right.byChecksum[item.checksum]: if item.relPath != match.relPath: result.add((left: item, right:match)) when isMainModule: let quitWithError = proc (error: string): void = stderr.writeLine("treediff: " & error) quit(QuitFailure) let doc = """ Usage: treediff [] [options] treediff (-h | --help) treediff (-V | --version) and represent paths to directory roots to be compared. If one of these paths points to a file instead of a directory, treediff assumes that the file represents a saved directory analysis to be loaded in place of a directory to compare. For example: treediff /path/to/dir /path/to/output.json will analyze the directory tree at '/path/to/dir' to create the left-side analysis and load a pre-existing analysis from '/path/to/output.json' as the right-side analysis. Options: -h --help Show this usage information. -V --version Show the program version. -v --verbose Enable verbose output. -q --quiet Suppress all output and error messages except for the progress indicator. -Q --very-quiet Suppress all output and error messages includeing the progress indicator. -1 --save-left Save the left analysis to (will be formatted as JSON) -2 --save-right Save the right analysis to (will be formatted as JSON) -s --same -S --exclude-same Show or hide information about files which are the same in both trees. -c --content-mismatch -C --exclude-content-mismatch Show or hide information about files whose relative paths are the same in both trees but whose contents differ. -p --path-mismatch -P --exclude-path-mismatch Show or hide information about files whose contents are the same in both trees but whose relative paths differ. -l --left-only -L --exclude-left-only Show or hide information about files which are found only in the left tree. -r --right-only -R --exclude-right-only Show or hide information about files which are found only in the right tree. """ let args = docopt(doc, version = "treediff v1.4.3") var verbosity = normal if args["--quiet"]: verbosity = quiet if args["--very-quiet"]: verbosity = very_quiet let progressWrapper = newProgressWrapper(verbosity = verbosity) # Load or perform analysis if not args[""]: quitWithError("Missing parameter.") let leftPath: string = $args[""] let loadPath = proc (path: string): DirAnalysis = if not path.fileExists and not path.dirExists: quitWithError($path & ": no such file or directory.") let fileInfo = path.getFileInfo if fileInfo.kind == pcDir: return analyzeDir(path, progressWrapper) elif fileInfo.kind == pcFile: loadAnalysis(path, result) else: quitWithError($path & ": is not a file or directory") var leftAnalysis, rightAnalysis: DirAnalysis leftAnalysis = loadPath(leftPath) if not args[""]: rightAnalysis = (allEntries: @[], byRelPath: newTable[string, ref FileEntry](), byChecksum: newTable[string, seq[ref FileEntry]]()) else: var rightPath: string = $args[""] rightAnalysis = loadPath(rightPath) # Check for output options if args["--save-left"]: saveAnalysis($args["--save-left"], leftAnalysis) if args["--save-right"] and rightAnalysis.allEntries.len > 0: saveAnalysis($args["--save-right"], rightAnalysis) # Parse filter options var displayOptions: DisplayOptions = ( left: false, right: false, same: false, content: false, path: false) # If none of the explicit selectors are given, assume all are expected. if not (args["--left-only"] or args["--right-only"] or args["--same"] or args["--content-mismatch"] or args["--path-mismatch"] ): displayOptions = (left: true, right: true, same: true, content: true, path: true) if args["--same"]: displayOptions.same = true if args["--exclude-same"]: displayOptions.same = false if args["--content-mismatch"]: displayOptions.content = true if args["--exclude-content-mismatch"]: displayOptions.content = false if args["--path-mismatch"]: displayOptions.path = true if args["--exclude-path-mismatch"]: displayOptions.path = false if args["--left-only"]: displayOptions.left = true if args["--exclude-left-only"]: displayOptions.left = false if args["--right-only"]: displayOptions.right = true if args["--exclude-right-only"]: displayOptions.right = false # Display output results if verbosity == normal: if displayOptions.same: let sameEntries = leftAnalysis * rightAnalysis for fe in sameEntries: echo "same: ", fe.relPath if displayOptions.content: let contentsDiffer = samePathDifferentContents(leftAnalysis, rightAnalysis) for path in contentsDiffer: echo "contents differ: ", path if displayOptions.path: let pathsDiffer = sameContentsDifferentPaths(leftAnalysis, rightAnalysis) for pair in pathsDiffer: echo "paths differ: ", pair.left.relPath, " ", pair.right.relPath if displayOptions.left: let leftOnly = leftAnalysis - rightAnalysis for fe in leftOnly: echo "left only: ", fe.relPath if displayOptions.right: let rightOnly = rightAnalysis - leftAnalysis for fe in rightOnly: echo "right only: ", fe.relPath