|
|
|
@ -1,54 +1,33 @@
|
|
|
|
|
## Tree Diff
|
|
|
|
|
## =========
|
|
|
|
|
##
|
|
|
|
|
## Utility to compare the file contents of two directory trees.
|
|
|
|
|
|
|
|
|
|
import std/[json, jsonutils, os, tables, sequtils, strutils]
|
|
|
|
|
import docopt
|
|
|
|
|
import os, tables, streams, sequtils, strutils, docopt, marshal
|
|
|
|
|
import incremental_md5, console_progress
|
|
|
|
|
|
|
|
|
|
import ./cliconstants
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
type
|
|
|
|
|
Verbosity* = enum very_quiet, quiet, normal
|
|
|
|
|
FileEntry* = tuple[relPath: string, checksum: string]
|
|
|
|
|
DirAnalysis* = tuple[allEntries: seq[ref FileEntry],
|
|
|
|
|
byRelPath: ref Table[string, ref FileEntry],
|
|
|
|
|
byChecksum: ref Table[string, seq[ref FileEntry]]]
|
|
|
|
|
ProgressWrapper* = tuple[impl: Progress, verbosity: Verbosity]
|
|
|
|
|
## Wrapper around a console_progress.Progress.
|
|
|
|
|
|
|
|
|
|
Verbosity* = enum ## Enum representing the level of output verbosity the tool will emit.
|
|
|
|
|
very_quiet, ## suppress all output including the progress indicator
|
|
|
|
|
quiet, ## suppress all output except the progress indicator
|
|
|
|
|
normal ## emit all output
|
|
|
|
|
|
|
|
|
|
proc newProgressWrapper*(outFile = stdout, verbosity = normal): ProgressWrapper =
|
|
|
|
|
## Create a new ProgressWrapper for the given verbosity.
|
|
|
|
|
if verbosity > very_quiet:
|
|
|
|
|
result = (impl: newProgress(0, outFile), verbosity: verbosity)
|
|
|
|
|
else: result = (impl: nil, verbosity: verbosity)
|
|
|
|
|
DisplayOptions = tuple[left, right, same, content, path: bool]
|
|
|
|
|
|
|
|
|
|
proc init(p: ProgressWrapper, root: string, fileCount: int): void =
|
|
|
|
|
if p.verbosity == normal:
|
|
|
|
|
echo "-- ", root.expandFilename, "\L ", fileCount, " files"
|
|
|
|
|
if p.verbosity == normal: echo "-- ", root.expandFilename
|
|
|
|
|
if p.verbosity > very_quiet: p.impl.setMax(fileCount)
|
|
|
|
|
|
|
|
|
|
proc update(p: ProgressWrapper, count: int, file: string): void =
|
|
|
|
|
if p.verbosity > very_quiet:
|
|
|
|
|
p.impl.updateProgress(count, file[max(file.high - 15, 0)..file.high])
|
|
|
|
|
p.impl.updateProgress(count, file[(file.high - 15)..file.high])
|
|
|
|
|
|
|
|
|
|
proc finish(p: ProgressWrapper): void =
|
|
|
|
|
if p.verbosity > very_quiet:
|
|
|
|
|
p.impl.erase
|
|
|
|
|
if p.verbosity == normal: echo " ", p.impl.getMax, " files.\L"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
echo " ", p.impl.getMax, " files.\L"
|
|
|
|
|
|
|
|
|
|
proc countFiles(root: string): int =
|
|
|
|
|
for file in walkDirRec(root): result += 1
|
|
|
|
|
for file in walkDirRec(root):
|
|
|
|
|
result += 1
|
|
|
|
|
|
|
|
|
|
proc getRelPath(ancestor, child: string): string =
|
|
|
|
|
## Given a ancestor path and a child path, assuming the child path is
|
|
|
|
|
## contained within the ancestor path, return the relative path from the
|
|
|
|
|
## ancestor to the child.
|
|
|
|
|
|
|
|
|
|
let ancestorPath = ancestor.expandFilename.split({DirSep, AltSep})
|
|
|
|
|
let childPath = child.expandFilename.split({DirSep, AltSep})
|
|
|
|
|
|
|
|
|
@ -66,140 +45,157 @@ proc getRelPath(ancestor, child: string): string =
|
|
|
|
|
if idx != ancestorPath.len: return ""
|
|
|
|
|
return foldl(@["."] & childPath[idx..childPath.high], joinPath(a, b))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
type
|
|
|
|
|
FileEntry* = ref tuple[relPath: string, checksum: string]
|
|
|
|
|
## Data about one file that has been analyzed
|
|
|
|
|
|
|
|
|
|
DirAnalysis* = ## Analysis data about one directory tree.
|
|
|
|
|
tuple[allEntries: seq[FileEntry],
|
|
|
|
|
byRelPath: TableRef[string, FileEntry],
|
|
|
|
|
byChecksum: TableRef[string, seq[FileEntry]]]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
DisplayOptions = tuple[left, right, same, content, path: bool]
|
|
|
|
|
## Consolidated description of which types of results to display.
|
|
|
|
|
|
|
|
|
|
func `$`(f: FileEntry): string = f.checksum & ": " & f.relPath
|
|
|
|
|
|
|
|
|
|
proc getOrFail(n: JsonNode, key: string, objName: string = ""): JsonNode =
|
|
|
|
|
## convenience method to get a key from a JObject or raise an exception
|
|
|
|
|
if not n.hasKey(key): raise newException(Exception, objName & " missing key '" & key & "'")
|
|
|
|
|
return n[key]
|
|
|
|
|
|
|
|
|
|
proc getIfExists(n: JsonNode, key: string): JsonNode =
|
|
|
|
|
## convenience method to get a key from a JObject or return null
|
|
|
|
|
result = if n.hasKey(key): n[key]
|
|
|
|
|
else: newJNull()
|
|
|
|
|
|
|
|
|
|
func parseFileEntry(n: JsonNode): FileEntry =
|
|
|
|
|
result = new(FileEntry)
|
|
|
|
|
result.relPath = n.getOrFail("relPath").getStr
|
|
|
|
|
result.checksum = n.getOrFail("checksum").getStr
|
|
|
|
|
|
|
|
|
|
func initDirAnalysis(): DirAnalysis =
|
|
|
|
|
(allEntries: @[],
|
|
|
|
|
byRelPath: newTable[string, FileEntry](),
|
|
|
|
|
byChecksum: newTable[string, seq[FileEntry]]())
|
|
|
|
|
|
|
|
|
|
func indexEntries(da: var DirAnalysis) =
|
|
|
|
|
for e in da.allEntries:
|
|
|
|
|
da.byRelPath[e.relPath] = e
|
|
|
|
|
if not da.byChecksum.hasKey(e.checksum):
|
|
|
|
|
da.byChecksum[e.checksum] = newSeq[FileEntry]()
|
|
|
|
|
da.byChecksum[e.checksum].add(e)
|
|
|
|
|
proc newProgressWrapper*(verbosity: Verbosity): ProgressWrapper =
|
|
|
|
|
if verbosity > very_quiet:
|
|
|
|
|
result = (impl: newProgress(stdout, 0), verbosity: verbosity)
|
|
|
|
|
else: result = (impl: nil, verbosity: verbosity)
|
|
|
|
|
|
|
|
|
|
proc analyzeDir*(root: string, progress: ProgressWrapper): DirAnalysis =
|
|
|
|
|
## Inspect a directory and analyze all files, noting their relative paths and
|
|
|
|
|
## checksum of their contents.
|
|
|
|
|
let fileCount = countFiles(root)
|
|
|
|
|
|
|
|
|
|
progress.init(root, fileCount + 10)
|
|
|
|
|
progress.init(root, fileCount)
|
|
|
|
|
|
|
|
|
|
result = initDirAnalysis()
|
|
|
|
|
result = (allEntries: @[],
|
|
|
|
|
byRelPath: newTable[string, ref FileEntry](),
|
|
|
|
|
byChecksum: newTable[string, seq[ref FileEntry]]())
|
|
|
|
|
|
|
|
|
|
var count = 0
|
|
|
|
|
for file in walkDirRec(root):
|
|
|
|
|
let md5sum = fileToMd5(file)
|
|
|
|
|
|
|
|
|
|
var fileEntry: FileEntry = new(FileEntry)
|
|
|
|
|
fileEntry[] = (relPath: getRelPath(root, file), checksum: md5sum)
|
|
|
|
|
# Compute checksum
|
|
|
|
|
let md5sum = fileToMd5(file)
|
|
|
|
|
var fileEntry: ref FileEntry = new(ref FileEntry)
|
|
|
|
|
fileEntry[] = (relPath: getRelPath(root, file), checksum: md5sum )
|
|
|
|
|
|
|
|
|
|
# Add to allEntries list
|
|
|
|
|
result.allEntries.add(fileEntry)
|
|
|
|
|
|
|
|
|
|
# Add to byRelPath table
|
|
|
|
|
result.byRelPath[fileEntry.relPath] = fileEntry
|
|
|
|
|
|
|
|
|
|
# Add to the byChecksum table
|
|
|
|
|
if not result.byChecksum.hasKey(fileEntry.relPath):
|
|
|
|
|
result.byChecksum[fileEntry.checksum] = newSeq[ref FileEntry]()
|
|
|
|
|
|
|
|
|
|
result.byChecksum[fileEntry.checksum].add(fileEntry)
|
|
|
|
|
|
|
|
|
|
progress.update(count, file)
|
|
|
|
|
count += 1
|
|
|
|
|
|
|
|
|
|
result.indexEntries
|
|
|
|
|
count += 10
|
|
|
|
|
progress.finish()
|
|
|
|
|
|
|
|
|
|
proc loadAnalysis*(path: string): DirAnalysis =
|
|
|
|
|
## Load a previously performed directory analysis.
|
|
|
|
|
let allEntriesJson = parseJson(readFile(path))
|
|
|
|
|
result = initDirAnalysis()
|
|
|
|
|
result.allEntries = toSeq(items(allEntriesJson)).map(parseFileEntry)
|
|
|
|
|
result.indexEntries
|
|
|
|
|
proc loadAnalysis*(path: string, analysis: var DirAnalysis) =
|
|
|
|
|
let inStream: Stream = newFileStream(path, fmRead)
|
|
|
|
|
load(inStream, analysis)
|
|
|
|
|
|
|
|
|
|
proc saveAnalysis*(path: string, analysis: DirAnalysis): void =
|
|
|
|
|
## Save a completed analysis.
|
|
|
|
|
writeFile(path, $(analysis.allEntries.toJson))
|
|
|
|
|
let outStream = newFileStream(path, fmWrite)
|
|
|
|
|
store(outStream, analysis)
|
|
|
|
|
|
|
|
|
|
proc intersection*(left, right: DirAnalysis): seq[FileEntry] =
|
|
|
|
|
## Find all ``FileEntry`` that are the same on both sides: matching contents
|
|
|
|
|
## and paths.
|
|
|
|
|
return left.allEntries.filter do (item: FileEntry) -> bool:
|
|
|
|
|
proc intersection*(left, right: DirAnalysis): seq[ref FileEntry] =
|
|
|
|
|
return left.allEntries.filter do (item: ref FileEntry) -> bool:
|
|
|
|
|
if not right.byRelPath.hasKey(item.relPath): return false
|
|
|
|
|
let match = right.byRelPath[item.relPath]
|
|
|
|
|
if match == nil: return false
|
|
|
|
|
return item.checksum == match.checksum
|
|
|
|
|
|
|
|
|
|
proc difference*(left, right: DirAnalysis): seq[FileEntry] =
|
|
|
|
|
## Find all ``FileEntry`` that are present in the left but not present in
|
|
|
|
|
## the right.
|
|
|
|
|
return left.allEntries.filter do (item: FileEntry) -> bool:
|
|
|
|
|
proc difference*(left, right: DirAnalysis): seq[ref FileEntry] =
|
|
|
|
|
return left.allEntries.filter do (item: ref FileEntry) -> bool:
|
|
|
|
|
return not right.byRelPath.hasKey(item.relPath) and
|
|
|
|
|
not right.byChecksum.hasKey(item.checksum)
|
|
|
|
|
|
|
|
|
|
proc `*`*(left, right: DirAnalysis): seq[FileEntry] {.inline.} =
|
|
|
|
|
## Alias for `intersection(left, right) <#intersection>`_
|
|
|
|
|
proc `*`*(left, right: DirAnalysis): seq[ref FileEntry] {.inline.} =
|
|
|
|
|
return intersection(left, right)
|
|
|
|
|
|
|
|
|
|
proc `-`*(left, right: DirAnalysis): seq[FileEntry] {.inline.} =
|
|
|
|
|
## Alias for `difference(left, right) <#difference>`_
|
|
|
|
|
proc `-`*(left, right: DirAnalysis): seq[ref FileEntry] {.inline.} =
|
|
|
|
|
return difference(left, right)
|
|
|
|
|
|
|
|
|
|
proc samePathDifferentContents*(left, right: DirAnalysis): seq[string] =
|
|
|
|
|
## Find all ``FileEntry`` that have the same paths in both trees but whose
|
|
|
|
|
## contents differ.
|
|
|
|
|
let matchingEntries = left.allEntries.filter do (item: FileEntry) -> bool:
|
|
|
|
|
let matchingEntries = left.allEntries.filter do (item: ref FileEntry) -> bool:
|
|
|
|
|
if not right.byRelPath.hasKey(item.relPath): return false
|
|
|
|
|
let match = right.byRelPath[item.relPath]
|
|
|
|
|
return item.checksum != match.checksum
|
|
|
|
|
return matchingEntries.map(proc(item: FileEntry): string = return item.relPath)
|
|
|
|
|
return matchingEntries.map(proc(item: ref FileEntry): string = return item.relPath)
|
|
|
|
|
|
|
|
|
|
proc sameContentsDifferentPaths*(left, right: DirAnalysis): seq[tuple[left, right: FileEntry]] =
|
|
|
|
|
## Find all ``FileEntry`` whose contents are the same in both trees but
|
|
|
|
|
## which are located at differenc paths.
|
|
|
|
|
proc sameContentsDifferentPaths*(left, right: DirAnalysis): seq[tuple[left, right: ref FileEntry]] =
|
|
|
|
|
result = @[]
|
|
|
|
|
for item in left.allEntries:
|
|
|
|
|
if not right.byChecksum.hasKey(item.checksum): continue
|
|
|
|
|
for match in right.byChecksum[item.checksum]:
|
|
|
|
|
if item.relPath != match.relPath: result.add((left: item, right:match))
|
|
|
|
|
|
|
|
|
|
when isMainModule:
|
|
|
|
|
|
|
|
|
|
let quitWithError = proc (error: string): void =
|
|
|
|
|
proc quitWithError(error: string): void =
|
|
|
|
|
stderr.writeLine("treediff: " & error)
|
|
|
|
|
quit(QuitFailure)
|
|
|
|
|
|
|
|
|
|
let args = docopt(USAGE, version = "treediff " & VERSION)
|
|
|
|
|
when isMainModule:
|
|
|
|
|
|
|
|
|
|
let doc = """
|
|
|
|
|
Usage:
|
|
|
|
|
treediff <left> [<right>] [options]
|
|
|
|
|
treediff (-h | --help)
|
|
|
|
|
treediff (-V | --version)
|
|
|
|
|
|
|
|
|
|
<left> and <right> represent paths to directory roots to be compared. If one
|
|
|
|
|
of these paths points to a file instead of a directory, treediff assumes that
|
|
|
|
|
the file represents a saved directory analysis to be loaded in place of a
|
|
|
|
|
directory to compare. For example:
|
|
|
|
|
|
|
|
|
|
treediff /path/to/dir /path/to/output.json
|
|
|
|
|
|
|
|
|
|
will analyze the directory tree at '/path/to/dir' to create the left-side
|
|
|
|
|
analysis and load a pre-existing analysis from '/path/to/output.json' as the
|
|
|
|
|
right-side analysis.
|
|
|
|
|
|
|
|
|
|
Options:
|
|
|
|
|
-h --help Show this usage information.
|
|
|
|
|
-V --version Show the program version.
|
|
|
|
|
-v --verbose Enable verbose output.
|
|
|
|
|
-q --quiet Suppress all output and error messages except for the
|
|
|
|
|
progress indicator.
|
|
|
|
|
-Q --very-quiet Suppress all output and error messages includeing the
|
|
|
|
|
progress indicator.
|
|
|
|
|
|
|
|
|
|
-1 --save-left <left_out> Save the left analysis to <left_out> (will be
|
|
|
|
|
formatted as JSON)
|
|
|
|
|
-2 --save-right <right_out> Save the right analysis to <right_out> (will be
|
|
|
|
|
formatted as JSON)
|
|
|
|
|
|
|
|
|
|
-s --same
|
|
|
|
|
-S --exclude-same
|
|
|
|
|
|
|
|
|
|
Show or hide information about files which are the same in both trees.
|
|
|
|
|
|
|
|
|
|
-c --content-mismatch
|
|
|
|
|
-C --exclude-content-mismatch
|
|
|
|
|
|
|
|
|
|
Show or hide information about files whose relative paths are the same
|
|
|
|
|
in both trees but whose contents differ.
|
|
|
|
|
|
|
|
|
|
-p --path-mismatch
|
|
|
|
|
-P --exclude-path-mismatch
|
|
|
|
|
|
|
|
|
|
Show or hide information about files whose contents are the same in both
|
|
|
|
|
trees but whose relative paths differ.
|
|
|
|
|
|
|
|
|
|
-l --left-only
|
|
|
|
|
-L --exclude-left-only
|
|
|
|
|
|
|
|
|
|
Show or hide information about files which are found only in the left
|
|
|
|
|
tree.
|
|
|
|
|
|
|
|
|
|
-r --right-only
|
|
|
|
|
-R --exclude-right-only
|
|
|
|
|
|
|
|
|
|
Show or hide information about files which are found only in the right
|
|
|
|
|
tree.
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
let args = docopt(doc, version = "treediff v1.4.0")
|
|
|
|
|
|
|
|
|
|
var verbosity = normal
|
|
|
|
|
if args["--quiet"]: verbosity = quiet
|
|
|
|
|
if args["--very-quiet"]: verbosity = very_quiet
|
|
|
|
|
let progressWrapper = newProgressWrapper(verbosity = verbosity)
|
|
|
|
|
let progressWrapper = newProgressWrapper(verbosity)
|
|
|
|
|
|
|
|
|
|
# Load or perform analysis
|
|
|
|
|
if not args["<left>"]:
|
|
|
|
@ -215,7 +211,7 @@ when isMainModule:
|
|
|
|
|
if fileInfo.kind == pcDir:
|
|
|
|
|
return analyzeDir(path, progressWrapper)
|
|
|
|
|
elif fileInfo.kind == pcFile:
|
|
|
|
|
result = loadAnalysis(path)
|
|
|
|
|
loadAnalysis(path, result)
|
|
|
|
|
else:
|
|
|
|
|
quitWithError($path & ": is not a file or directory")
|
|
|
|
|
|
|
|
|
@ -225,8 +221,8 @@ when isMainModule:
|
|
|
|
|
|
|
|
|
|
if not args["<right>"]:
|
|
|
|
|
rightAnalysis = (allEntries: @[],
|
|
|
|
|
byRelPath: newTable[string, FileEntry](),
|
|
|
|
|
byChecksum: newTable[string, seq[FileEntry]]())
|
|
|
|
|
byRelPath: newTable[string, ref FileEntry](),
|
|
|
|
|
byChecksum: newTable[string, seq[ref FileEntry]]())
|
|
|
|
|
else:
|
|
|
|
|
var rightPath: string = $args["<right>"]
|
|
|
|
|
rightAnalysis = loadPath(rightPath)
|
|
|
|
@ -282,3 +278,4 @@ when isMainModule:
|
|
|
|
|
if displayOptions.right:
|
|
|
|
|
let rightOnly = rightAnalysis - leftAnalysis
|
|
|
|
|
for fe in rightOnly: echo "right only: ", fe.relPath
|
|
|
|
|
|
|
|
|
|