6 Commits
v1.2 ... 1.4.1

8 changed files with 443 additions and 53 deletions

1
.gitignore vendored
View File

@ -2,3 +2,4 @@
nimcache/
build/
.gradle/
/treediff

View File

@ -3,7 +3,7 @@ apply plugin: "application"
apply plugin: "maven"
group = "com.jdblabs"
version = "1.2"
version = "1.4.1"
mainClassName = "com.jdblabs.file.treediff.TreeDiff"
repositories {

View File

@ -8,7 +8,7 @@ import org.apache.commons.codec.digest.DigestUtils
public class TreeDiff {
public static final String VERSION = "1.2"
public static final String VERSION = "1.4.1"
private ObjectMapper objectMapper = new ObjectMapper()
private PrintStream stdout
@ -54,7 +54,7 @@ public class TreeDiff {
def opts = LightOptionParser.parseOptions(cliDef, args)
if (opts.h) { /* TODO */ return }
if (opts.h) { println this.usage; return }
if (opts.V) {
stdout.println "JDB Labs TreeDiff v${VERSION}"
@ -280,4 +280,103 @@ public class TreeDiff {
private void verboseErr(String msg) { if (verbose) stderr.println msg }
public String getUsage() {
return """\
JDB Labs TreeDiff v${VERSION}
Gather and display information about the differences between two file trees,
including files found in only one side and not the other, files that match on
both sides, files which share the same contents but reside in differing paths
on each side, and files that reside at the same location on both sides but
whose contents differ.
usage: treediff [options] <left-direction> <right-directory>
where options are:
-h, --help Output this usage information.
-v, --verbose Enable verbose output.
-V, --version Output the version information for the utility.
-g, --gui Launch the graphical interface (not yet implemented).
-s, --same
Output information about files that are the same on both sides.
-S, --exclude-same
Do not output information about files that are the same on both sides.
-c, --content-mismatch
Output information about files that have the same relative path on both
side but whose contents differ.
-c, --exclude-content-mismatch
Do not output information about files that have the same relative path
on both side but whose contents differ.
-p, --path-mismatch
Output information about files that have the same content but reside at
different relative paths on each side.
-P, --exclude-path-mismatch
Do not output information about files that have the same content but
reside at different relative paths on each side.
-l, --left-only
Output information about files found on only the left side (missing
from the right entirely).
-L, --exclude-left-only
Do not output information about files found on the left side only
(missing from the right entirely).
-r, --right-only
Output information about files found on only the right side (missing
from the left entirely).
-R, --exclude-right-only
Do not output information about files found on the right side only
(missing from the left entirely).
-q, --quiet
Suppress all output and error messages except for the progress
indicator.
-Q, --very-quiet
Suppress all output and error messages including the progress
indicator.
-rd, --direction <directory-path>
Use <directory-path> as the root for all relative file paths (input
directories to scan for example).
-i, --analysis-in <left-dir-analysis> <right-dir-analysis>
Use pre-calculated directory analysis in place of reading local
directories. This is useful if you wish to do diffs between two
directory trees that are not on the same filesystem, or if you wish to
display different output about a diff without re-scanning the
filesystem.
-o, --analysis-out <file-name-root>
In addition to the requested output on STDOUT, write the analysis for
each of the scanned directories to files named <file-name-root>.left
and <file-name-root>.right. These analysis files are formatted so that
they can be used as inputs to the --analysis-in option.
""";
}
}

View File

@ -0,0 +1,44 @@
import md5
import os
proc fileToMD5*(filename: string) : string =
const blockSize: int = 8192
var
c: MD5Context
d: MD5Digest
f: File
bytesRead: int = 0
buffer: array[blockSize, char]
byteTotal: int = 0
#read chunk of file, calling update until all bytes have been read
try:
f = open(filename)
md5Init(c)
bytesRead = f.readBuffer(buffer.addr, blockSize)
while bytesRead > 0:
byteTotal += bytesRead
md5Update(c, buffer, bytesRead)
bytesRead = f.readBuffer(buffer.addr, blockSize)
md5Final(c, d)
except IOError:
echo("File not found.")
finally:
if f != nil:
close(f)
result = $d
when isMainModule:
if paramCount() > 0:
let arguments = commandLineParams()
echo("MD5: ", fileToMD5(arguments[0]))
else:
echo("Must pass filename.")
quit(-1)

281
src/main/nim/treediff.nim Normal file
View File

@ -0,0 +1,281 @@
import os, tables, streams, sequtils, strutils, docopt, marshal
import incremental_md5, console_progress
type
Verbosity* = enum very_quiet, quiet, normal
FileEntry* = tuple[relPath: string, checksum: string]
DirAnalysis* = tuple[allEntries: seq[ref FileEntry],
byRelPath: ref Table[string, ref FileEntry],
byChecksum: ref Table[string, seq[ref FileEntry]]]
ProgressWrapper* = tuple[impl: Progress, verbosity: Verbosity]
DisplayOptions = tuple[left, right, same, content, path: bool]
proc init(p: ProgressWrapper, root: string, fileCount: int): void =
if p.verbosity == normal: echo "-- ", root.expandFilename
if p.verbosity > very_quiet: p.impl.setMax(fileCount)
proc update(p: ProgressWrapper, count: int, file: string): void =
if p.verbosity > very_quiet:
p.impl.updateProgress(count, file[(file.high - 15)..file.high])
proc finish(p: ProgressWrapper): void =
if p.verbosity > very_quiet:
p.impl.erase
if p.verbosity == normal: echo " ", p.impl.getMax, " files.\L"
proc countFiles(root: string): int =
for file in walkDirRec(root):
result += 1
proc getRelPath(ancestor, child: string): string =
let ancestorPath = ancestor.expandFilename.split({DirSep, AltSep})
let childPath = child.expandFilename.split({DirSep, AltSep})
# If the ancestor path is longer it cannot contain the child path and we
# cannot construct a relative path without backtracking.
if (ancestorPath.len > childPath.len): return ""
# Compare the ancestor and child path up until the end of the ancestor path.
var idx = 0
while idx < ancestorPath.len and ancestorPath[idx] == childPath[idx]: idx += 1
# If we stopped before reaching the end of the ancestor path it must be that
# the paths do not match. The ancestor cannot contain the child and we cannot
# build a relative path without backtracking.
if idx != ancestorPath.len: return ""
return foldl(@["."] & childPath[idx..childPath.high], joinPath(a, b))
proc newProgressWrapper*(verbosity: Verbosity): ProgressWrapper =
if verbosity > very_quiet:
result = (impl: newProgress(stdout, 0), verbosity: verbosity)
else: result = (impl: nil, verbosity: verbosity)
proc analyzeDir*(root: string, progress: ProgressWrapper): DirAnalysis =
let fileCount = countFiles(root)
progress.init(root, fileCount)
result = (allEntries: @[],
byRelPath: newTable[string, ref FileEntry](),
byChecksum: newTable[string, seq[ref FileEntry]]())
var count = 0
for file in walkDirRec(root):
# Compute checksum
let md5sum = fileToMd5(file)
var fileEntry: ref FileEntry = new(ref FileEntry)
fileEntry[] = (relPath: getRelPath(root, file), checksum: md5sum )
# Add to allEntries list
result.allEntries.add(fileEntry)
# Add to byRelPath table
result.byRelPath[fileEntry.relPath] = fileEntry
# Add to the byChecksum table
if not result.byChecksum.hasKey(fileEntry.relPath):
result.byChecksum[fileEntry.checksum] = newSeq[ref FileEntry]()
result.byChecksum[fileEntry.checksum].add(fileEntry)
progress.update(count, file)
count += 1
progress.finish()
proc loadAnalysis*(path: string, analysis: var DirAnalysis) =
let inStream: Stream = newFileStream(path, fmRead)
load(inStream, analysis)
proc saveAnalysis*(path: string, analysis: DirAnalysis): void =
let outStream = newFileStream(path, fmWrite)
store(outStream, analysis)
proc intersection*(left, right: DirAnalysis): seq[ref FileEntry] =
return left.allEntries.filter do (item: ref FileEntry) -> bool:
if not right.byRelPath.hasKey(item.relPath): return false
let match = right.byRelPath[item.relPath]
return item.checksum == match.checksum
proc difference*(left, right: DirAnalysis): seq[ref FileEntry] =
return left.allEntries.filter do (item: ref FileEntry) -> bool:
return not right.byRelPath.hasKey(item.relPath) and
not right.byChecksum.hasKey(item.checksum)
proc `*`*(left, right: DirAnalysis): seq[ref FileEntry] {.inline.} =
return intersection(left, right)
proc `-`*(left, right: DirAnalysis): seq[ref FileEntry] {.inline.} =
return difference(left, right)
proc samePathDifferentContents*(left, right: DirAnalysis): seq[string] =
let matchingEntries = left.allEntries.filter do (item: ref FileEntry) -> bool:
if not right.byRelPath.hasKey(item.relPath): return false
let match = right.byRelPath[item.relPath]
return item.checksum != match.checksum
return matchingEntries.map(proc(item: ref FileEntry): string = return item.relPath)
proc sameContentsDifferentPaths*(left, right: DirAnalysis): seq[tuple[left, right: ref FileEntry]] =
result = @[]
for item in left.allEntries:
if not right.byChecksum.hasKey(item.checksum): continue
for match in right.byChecksum[item.checksum]:
if item.relPath != match.relPath: result.add((left: item, right:match))
proc quitWithError(error: string): void =
stderr.writeLine("treediff: " & error)
quit(QuitFailure)
when isMainModule:
let doc = """
Usage:
treediff <left> [<right>] [options]
treediff (-h | --help)
treediff (-V | --version)
<left> and <right> represent paths to directory roots to be compared. If one
of these paths points to a file instead of a directory, treediff assumes that
the file represents a saved directory analysis to be loaded in place of a
directory to compare. For example:
treediff /path/to/dir /path/to/output.json
will analyze the directory tree at '/path/to/dir' to create the left-side
analysis and load a pre-existing analysis from '/path/to/output.json' as the
right-side analysis.
Options:
-h --help Show this usage information.
-V --version Show the program version.
-v --verbose Enable verbose output.
-q --quiet Suppress all output and error messages except for the
progress indicator.
-Q --very-quiet Suppress all output and error messages includeing the
progress indicator.
-1 --save-left <left_out> Save the left analysis to <left_out> (will be
formatted as JSON)
-2 --save-right <right_out> Save the right analysis to <right_out> (will be
formatted as JSON)
-s --same
-S --exclude-same
Show or hide information about files which are the same in both trees.
-c --content-mismatch
-C --exclude-content-mismatch
Show or hide information about files whose relative paths are the same
in both trees but whose contents differ.
-p --path-mismatch
-P --exclude-path-mismatch
Show or hide information about files whose contents are the same in both
trees but whose relative paths differ.
-l --left-only
-L --exclude-left-only
Show or hide information about files which are found only in the left
tree.
-r --right-only
-R --exclude-right-only
Show or hide information about files which are found only in the right
tree.
"""
let args = docopt(doc, version = "treediff v1.4.1")
var verbosity = normal
if args["--quiet"]: verbosity = quiet
if args["--very-quiet"]: verbosity = very_quiet
let progressWrapper = newProgressWrapper(verbosity)
# Load or perform analysis
if not args["<left>"]:
quitWithError("Missing <left> parameter.")
let leftPath: string = $args["<left>"]
let loadPath = proc (path: string): DirAnalysis =
if not path.fileExists and not path.dirExists:
quitWithError($path & ": no such file or directory.")
let fileInfo = path.getFileInfo
if fileInfo.kind == pcDir:
return analyzeDir(path, progressWrapper)
elif fileInfo.kind == pcFile:
loadAnalysis(path, result)
else:
quitWithError($path & ": is not a file or directory")
var leftAnalysis, rightAnalysis: DirAnalysis
leftAnalysis = loadPath(leftPath)
if not args["<right>"]:
rightAnalysis = (allEntries: @[],
byRelPath: newTable[string, ref FileEntry](),
byChecksum: newTable[string, seq[ref FileEntry]]())
else:
var rightPath: string = $args["<right>"]
rightAnalysis = loadPath(rightPath)
# Check for output options
if args["--save-left"]:
saveAnalysis($args["--save-left"], leftAnalysis)
if args["--save-right"] and rightAnalysis.allEntries.len > 0:
saveAnalysis($args["--save-right"], rightAnalysis)
# Parse filter options
var displayOptions: DisplayOptions = (
left: false, right: false, same: false, content: false, path: false)
# If none of the explicit selectors are given, assume all are expected.
if not (args["--left-only"] or args["--right-only"] or
args["--same"] or args["--content-mismatch"] or
args["--path-mismatch"] ):
displayOptions = (left: true, right: true, same: true,
content: true, path: true)
if args["--same"]: displayOptions.same = true
if args["--exclude-same"]: displayOptions.same = false
if args["--content-mismatch"]: displayOptions.content = true
if args["--exclude-content-mismatch"]: displayOptions.content = false
if args["--path-mismatch"]: displayOptions.path = true
if args["--exclude-path-mismatch"]: displayOptions.path = false
if args["--left-only"]: displayOptions.left = true
if args["--exclude-left-only"]: displayOptions.left = false
if args["--right-only"]: displayOptions.right = true
if args["--exclude-right-only"]: displayOptions.right = false
# Display output results
if verbosity == normal:
if displayOptions.same:
let sameEntries = leftAnalysis * rightAnalysis
for fe in sameEntries: echo "same: ", fe.relPath
if displayOptions.content:
let contentsDiffer = samePathDifferentContents(leftAnalysis, rightAnalysis)
for path in contentsDiffer: echo "contents differ: ", path
if displayOptions.path:
let pathsDiffer = sameContentsDifferentPaths(leftAnalysis, rightAnalysis)
for pair in pathsDiffer:
echo "paths differ: ", pair.left.relPath, " ", pair.right.relPath
if displayOptions.left:
let leftOnly = leftAnalysis - rightAnalysis
for fe in leftOnly: echo "left only: ", fe.relPath
if displayOptions.right:
let rightOnly = rightAnalysis - leftAnalysis
for fe in rightOnly: echo "right only: ", fe.relPath

View File

@ -1,39 +0,0 @@
import os, docopt, tables, md5, iterutils, re
proc studyDir(root: string, ignore: Iterable[string]): TableRef[string, string] =
result = newTable[string, string]()
for path in walkDirRec(root):
var relPath = substr(path, len(root))
if foldl(ignore, proc (acc: bool, it: string): bool = acc and match(relPath, re(it)), true): continue
var fileInfo = getFileInfo(path)
if fileInfo.kind == pcFile:
result.add(relPath, $(toMD5(readFile(path))))
elif fileInfo.kind == pcDir:
result.add(relPath, "directory")
when isMainModule:
let doc = """
treediff
Usage:
treediff [-i <regex>]... [<path>]...
treediff (-h | --help)
treediff (-v | --version)
Options:
-h --help Show this usage information.
-v --version Show the program version.
"""
let args = docopt(doc, version = "treediff 0.1")
for root in @(args["<path>"]):
echo "Looking at ", root
echo studyDir(root, @(args["<regex>"]))
echo ""

View File

@ -1,11 +1,10 @@
[Package]
name = "treeediff"
version = "0.1.0"
# Package
version = "1.4.1"
author = "Jonathan Bernard (jdb@jdb-labs.com)"
description = "Tree Diff"
description = "Utility to generate diffs of full directory trees."
license = "BSD"
bin = @["treediff"]
srcDir = "src/main/nim"
bin = "treediff"
[Deps]
Requires: "nim >= 0.10.0, docopt >= 0.1.0, iterutils >= 0.1.0"
# Dependencies
requires: @["nim >= 0.13.0", "docopt >= 0.1.0", "console_progress >= 1.2.1"]

5
worklog.md Normal file
View File

@ -0,0 +1,5 @@
Current Task
========================================
Currently implementing `countFiles` in
treediff.nim