Compare commits

...

15 Commits
v1.2 ... main

Author SHA1 Message Date
9a73b7f248 Updates for Nim 2.0 2024-08-05 07:02:06 -05:00
ce821d8f53 Change storage format of saved analysis. 2024-02-14 12:20:20 -06:00
de9ff2b54a Update for console_progress version, extract CLI constants. 2023-02-15 07:19:15 -06:00
bd41e2d2f5 Update Nim version of treediff to build against Nim 1.0. 2020-01-05 05:28:19 -06:00
Jonathan Bernard
c59a700cf9 Changed groovy version to localGroovy(), bumped jdb-util version. 2016-12-27 01:32:44 -06:00
Jonathan Bernard
641a562563 Added JDB Labs maven repo to build configuration. 2016-04-21 07:52:57 -05:00
Jonathan Bernard
e49bd4e9c9 Fixed output of nim version: should print file totals before scanning. 2016-02-10 07:28:42 -06:00
Jonathan Bernard
6cc4bf390f Add Nim documentation, parameterize ProgressWrapper output destination. 2016-01-26 10:52:15 -06:00
Jonathan Bernard
3f829ef69e Updated worklog with next steps. 2016-01-26 01:51:22 -06:00
Jonathan Bernard
92d8ed61fc Use newer version of console_progress for bug fix. 2016-01-26 01:32:53 -06:00
Jonathan Bernard
2277dd0828 Advance version number for new implementation. 2016-01-26 00:34:19 -06:00
Jonathan Bernard
9728055c45 Moved Nim sources into src/main/nim. 2016-01-26 00:32:41 -06:00
Jonathan Bernard
c16a5a684d Nim implementation. 2016-01-26 00:29:49 -06:00
Jonathan Bernard
ab99661720 WIP: Porting to Nim. 2016-01-25 09:13:34 -06:00
Jonathan Bernard
c0e3818520 Added usage message. 2015-08-13 12:33:38 -05:00
9 changed files with 516 additions and 56 deletions

1
.gitignore vendored
View File

@ -2,3 +2,4 @@
nimcache/
build/
.gradle/
/treediff

View File

@ -3,17 +3,18 @@ apply plugin: "application"
apply plugin: "maven"
group = "com.jdblabs"
version = "1.2"
version = "1.4.4"
mainClassName = "com.jdblabs.file.treediff.TreeDiff"
repositories {
mavenLocal()
mavenCentral() }
mavenCentral()
maven { url "http://mvn.jdb-labs.com/repo" } }
dependencies {
compile 'org.codehaus.groovy:groovy-all:2.4.3'
compile 'com.jdbernard:jdb-util:3.8'
compile 'commons-codec:commons-codec:1.10'
compile localGroovy()
compile 'com.jdbernard:jdb-util:4.+'
compile 'commons-codec:commons-codec:1.10'
compile 'com.fasterxml.jackson.core:jackson-databind:2.4.4'
testCompile 'junit:junit:4.12'

View File

@ -8,7 +8,7 @@ import org.apache.commons.codec.digest.DigestUtils
public class TreeDiff {
public static final String VERSION = "1.2"
public static final String VERSION = "1.4.3"
private ObjectMapper objectMapper = new ObjectMapper()
private PrintStream stdout
@ -54,7 +54,7 @@ public class TreeDiff {
def opts = LightOptionParser.parseOptions(cliDef, args)
if (opts.h) { /* TODO */ return }
if (opts.h) { println this.usage; return }
if (opts.V) {
stdout.println "JDB Labs TreeDiff v${VERSION}"
@ -143,7 +143,7 @@ public class TreeDiff {
if (rootName.startsWith('/')) rightOut = new File(rootName + '.right')
else rightOut = new File(relativeRoot, rootName + '.right')
objectMapper.writeValue(leftOut, left)
objectMapper.writeValue(rightOut, right) }
}
@ -171,7 +171,7 @@ public class TreeDiff {
frame(title: "TreeDif v${VERSION}", show: true) {
boxLayout()
}
}
@ -182,7 +182,7 @@ public class TreeDiff {
}
public static Set<String> samePathDifferentContents(DirAnalysis left, DirAnalysis right) {
return left.allEntries.findAll { l ->
return left.allEntries.findAll { l ->
FileEntry match = right.byRelativePath[l.relativePath]
return match != null && l.checksum != match.checksum }
.collect { it.relativePath } }
@ -280,4 +280,103 @@ public class TreeDiff {
private void verboseErr(String msg) { if (verbose) stderr.println msg }
public String getUsage() {
return """\
JDB Labs TreeDiff v${VERSION}
Gather and display information about the differences between two file trees,
including files found in only one side and not the other, files that match on
both sides, files which share the same contents but reside in differing paths
on each side, and files that reside at the same location on both sides but
whose contents differ.
usage: treediff [options] <left-direction> <right-directory>
where options are:
-h, --help Output this usage information.
-v, --verbose Enable verbose output.
-V, --version Output the version information for the utility.
-g, --gui Launch the graphical interface (not yet implemented).
-s, --same
Output information about files that are the same on both sides.
-S, --exclude-same
Do not output information about files that are the same on both sides.
-c, --content-mismatch
Output information about files that have the same relative path on both
side but whose contents differ.
-c, --exclude-content-mismatch
Do not output information about files that have the same relative path
on both side but whose contents differ.
-p, --path-mismatch
Output information about files that have the same content but reside at
different relative paths on each side.
-P, --exclude-path-mismatch
Do not output information about files that have the same content but
reside at different relative paths on each side.
-l, --left-only
Output information about files found on only the left side (missing
from the right entirely).
-L, --exclude-left-only
Do not output information about files found on the left side only
(missing from the right entirely).
-r, --right-only
Output information about files found on only the right side (missing
from the left entirely).
-R, --exclude-right-only
Do not output information about files found on the right side only
(missing from the left entirely).
-q, --quiet
Suppress all output and error messages except for the progress
indicator.
-Q, --very-quiet
Suppress all output and error messages including the progress
indicator.
-rd, --direction <directory-path>
Use <directory-path> as the root for all relative file paths (input
directories to scan for example).
-i, --analysis-in <left-dir-analysis> <right-dir-analysis>
Use pre-calculated directory analysis in place of reading local
directories. This is useful if you wish to do diffs between two
directory trees that are not on the same filesystem, or if you wish to
display different output about a diff without re-scanning the
filesystem.
-o, --analysis-out <file-name-root>
In addition to the requested output on STDOUT, write the analysis for
each of the scanned directories to files named <file-name-root>.left
and <file-name-root>.right. These analysis files are formatted so that
they can be used as inputs to the --analysis-in option.
""";
}
}

View File

@ -0,0 +1,63 @@
const VERSION* = "2.0.1"
const USAGE* = """
Usage:
treediff <left> [<right>] [options]
treediff (-h | --help)
treediff (-V | --version)
<left> and <right> represent paths to directory roots to be compared. If one
of these paths points to a file instead of a directory, treediff assumes that
the file represents a saved directory analysis to be loaded in place of a
directory to compare. For example:
treediff /path/to/dir /path/to/output.json
will analyze the directory tree at '/path/to/dir' to create the left-side
analysis and load a pre-existing analysis from '/path/to/output.json' as the
right-side analysis.
Options:
-h --help Show this usage information.
-V --version Show the program version.
-v --verbose Enable verbose output.
-q --quiet Suppress all output and error messages except for the
progress indicator.
-Q --very-quiet Suppress all output and error messages includeing the
progress indicator.
-1 --save-left <left_out> Save the left analysis to <left_out> (will be
formatted as JSON)
-2 --save-right <right_out> Save the right analysis to <right_out> (will be
formatted as JSON)
-s --same
-S --exclude-same
Show or hide information about files which are the same in both trees.
-c --content-mismatch
-C --exclude-content-mismatch
Show or hide information about files whose relative paths are the same
in both trees but whose contents differ.
-p --path-mismatch
-P --exclude-path-mismatch
Show or hide information about files whose contents are the same in both
trees but whose relative paths differ.
-l --left-only
-L --exclude-left-only
Show or hide information about files which are found only in the left
tree.
-r --right-only
-R --exclude-right-only
Show or hide information about files which are found only in the right
tree.
"""

View File

@ -0,0 +1,40 @@
import std/streams
import checksums/md5
proc fileToMD5*(filename: string) : string =
const blockSize: int = 8192 # read files in 8KB chunnks
var
c: MD5Context
d: MD5Digest
fs: FileStream
buffer: string
#read chunk of file, calling update until all bytes have been read
try:
fs = filename.open.newFileStream
md5Init(c)
buffer = fs.readStr(blockSize)
while buffer.len > 0:
md5Update(c, buffer.cstring, buffer.len)
buffer = fs.readStr(blockSize)
md5Final(c, d)
except IOError: echo("File not found.")
finally:
if fs != nil:
close(fs)
result = $d
when isMainModule:
if paramCount() > 0:
let arguments = commandLineParams()
echo("MD5: ", fileToMD5(arguments[0]))
else:
echo("Must pass filename.")
quit(-1)

284
src/main/nim/treediff.nim Normal file
View File

@ -0,0 +1,284 @@
## Tree Diff
## =========
##
## Utility to compare the file contents of two directory trees.
import std/[json, jsonutils, os, tables, sequtils, strutils]
import docopt
import incremental_md5, console_progress
import ./cliconstants
type
ProgressWrapper* = tuple[impl: Progress, verbosity: Verbosity]
## Wrapper around a console_progress.Progress.
Verbosity* = enum ## Enum representing the level of output verbosity the tool will emit.
very_quiet, ## suppress all output including the progress indicator
quiet, ## suppress all output except the progress indicator
normal ## emit all output
proc newProgressWrapper*(outFile = stdout, verbosity = normal): ProgressWrapper =
## Create a new ProgressWrapper for the given verbosity.
if verbosity > very_quiet:
result = (impl: newProgress(0, outFile), verbosity: verbosity)
else: result = (impl: nil, verbosity: verbosity)
proc init(p: ProgressWrapper, root: string, fileCount: int): void =
if p.verbosity == normal:
echo "-- ", root.expandFilename, "\L ", fileCount, " files"
if p.verbosity > very_quiet: p.impl.setMax(fileCount)
proc update(p: ProgressWrapper, count: int, file: string): void =
if p.verbosity > very_quiet:
p.impl.updateProgress(count, file[max(file.high - 15, 0)..file.high])
proc finish(p: ProgressWrapper): void =
if p.verbosity > very_quiet:
p.impl.erase
if p.verbosity == normal: echo " ", p.impl.getMax, " files.\L"
proc countFiles(root: string): int =
for file in walkDirRec(root): result += 1
proc getRelPath(ancestor, child: string): string =
## Given a ancestor path and a child path, assuming the child path is
## contained within the ancestor path, return the relative path from the
## ancestor to the child.
let ancestorPath = ancestor.expandFilename.split({DirSep, AltSep})
let childPath = child.expandFilename.split({DirSep, AltSep})
# If the ancestor path is longer it cannot contain the child path and we
# cannot construct a relative path without backtracking.
if (ancestorPath.len > childPath.len): return ""
# Compare the ancestor and child path up until the end of the ancestor path.
var idx = 0
while idx < ancestorPath.len and ancestorPath[idx] == childPath[idx]: idx += 1
# If we stopped before reaching the end of the ancestor path it must be that
# the paths do not match. The ancestor cannot contain the child and we cannot
# build a relative path without backtracking.
if idx != ancestorPath.len: return ""
return foldl(@["."] & childPath[idx..childPath.high], joinPath(a, b))
type
FileEntry* = ref tuple[relPath: string, checksum: string]
## Data about one file that has been analyzed
DirAnalysis* = ## Analysis data about one directory tree.
tuple[allEntries: seq[FileEntry],
byRelPath: TableRef[string, FileEntry],
byChecksum: TableRef[string, seq[FileEntry]]]
DisplayOptions = tuple[left, right, same, content, path: bool]
## Consolidated description of which types of results to display.
func `$`(f: FileEntry): string = f.checksum & ": " & f.relPath
proc getOrFail(n: JsonNode, key: string, objName: string = ""): JsonNode =
## convenience method to get a key from a JObject or raise an exception
if not n.hasKey(key): raise newException(Exception, objName & " missing key '" & key & "'")
return n[key]
proc getIfExists(n: JsonNode, key: string): JsonNode =
## convenience method to get a key from a JObject or return null
result = if n.hasKey(key): n[key]
else: newJNull()
func parseFileEntry(n: JsonNode): FileEntry =
result = new(FileEntry)
result.relPath = n.getOrFail("relPath").getStr
result.checksum = n.getOrFail("checksum").getStr
func initDirAnalysis(): DirAnalysis =
(allEntries: @[],
byRelPath: newTable[string, FileEntry](),
byChecksum: newTable[string, seq[FileEntry]]())
func indexEntries(da: var DirAnalysis) =
for e in da.allEntries:
da.byRelPath[e.relPath] = e
if not da.byChecksum.hasKey(e.checksum):
da.byChecksum[e.checksum] = newSeq[FileEntry]()
da.byChecksum[e.checksum].add(e)
proc analyzeDir*(root: string, progress: ProgressWrapper): DirAnalysis =
## Inspect a directory and analyze all files, noting their relative paths and
## checksum of their contents.
let fileCount = countFiles(root)
progress.init(root, fileCount + 10)
result = initDirAnalysis()
var count = 0
for file in walkDirRec(root):
let md5sum = fileToMd5(file)
var fileEntry: FileEntry = new(FileEntry)
fileEntry[] = (relPath: getRelPath(root, file), checksum: md5sum)
result.allEntries.add(fileEntry)
progress.update(count, file)
count += 1
result.indexEntries
count += 10
progress.finish()
proc loadAnalysis*(path: string): DirAnalysis =
## Load a previously performed directory analysis.
let allEntriesJson = parseJson(readFile(path))
result = initDirAnalysis()
result.allEntries = toSeq(items(allEntriesJson)).map(parseFileEntry)
result.indexEntries
proc saveAnalysis*(path: string, analysis: DirAnalysis): void =
## Save a completed analysis.
writeFile(path, $(analysis.allEntries.toJson))
proc intersection*(left, right: DirAnalysis): seq[FileEntry] =
## Find all ``FileEntry`` that are the same on both sides: matching contents
## and paths.
return left.allEntries.filter do (item: FileEntry) -> bool:
if not right.byRelPath.hasKey(item.relPath): return false
let match = right.byRelPath[item.relPath]
if match == nil: return false
return item.checksum == match.checksum
proc difference*(left, right: DirAnalysis): seq[FileEntry] =
## Find all ``FileEntry`` that are present in the left but not present in
## the right.
return left.allEntries.filter do (item: FileEntry) -> bool:
return not right.byRelPath.hasKey(item.relPath) and
not right.byChecksum.hasKey(item.checksum)
proc `*`*(left, right: DirAnalysis): seq[FileEntry] {.inline.} =
## Alias for `intersection(left, right) <#intersection>`_
return intersection(left, right)
proc `-`*(left, right: DirAnalysis): seq[FileEntry] {.inline.} =
## Alias for `difference(left, right) <#difference>`_
return difference(left, right)
proc samePathDifferentContents*(left, right: DirAnalysis): seq[string] =
## Find all ``FileEntry`` that have the same paths in both trees but whose
## contents differ.
let matchingEntries = left.allEntries.filter do (item: FileEntry) -> bool:
if not right.byRelPath.hasKey(item.relPath): return false
let match = right.byRelPath[item.relPath]
return item.checksum != match.checksum
return matchingEntries.map(proc(item: FileEntry): string = return item.relPath)
proc sameContentsDifferentPaths*(left, right: DirAnalysis): seq[tuple[left, right: FileEntry]] =
## Find all ``FileEntry`` whose contents are the same in both trees but
## which are located at differenc paths.
result = @[]
for item in left.allEntries:
if not right.byChecksum.hasKey(item.checksum): continue
for match in right.byChecksum[item.checksum]:
if item.relPath != match.relPath: result.add((left: item, right:match))
when isMainModule:
let quitWithError = proc (error: string): void =
stderr.writeLine("treediff: " & error)
quit(QuitFailure)
let args = docopt(USAGE, version = "treediff " & VERSION)
var verbosity = normal
if args["--quiet"]: verbosity = quiet
if args["--very-quiet"]: verbosity = very_quiet
let progressWrapper = newProgressWrapper(verbosity = verbosity)
# Load or perform analysis
if not args["<left>"]:
quitWithError("Missing <left> parameter.")
let leftPath: string = $args["<left>"]
let loadPath = proc (path: string): DirAnalysis =
if not path.fileExists and not path.dirExists:
quitWithError($path & ": no such file or directory.")
let fileInfo = path.getFileInfo
if fileInfo.kind == pcDir:
return analyzeDir(path, progressWrapper)
elif fileInfo.kind == pcFile:
result = loadAnalysis(path)
else:
quitWithError($path & ": is not a file or directory")
var leftAnalysis, rightAnalysis: DirAnalysis
leftAnalysis = loadPath(leftPath)
if not args["<right>"]:
rightAnalysis = (allEntries: @[],
byRelPath: newTable[string, FileEntry](),
byChecksum: newTable[string, seq[FileEntry]]())
else:
var rightPath: string = $args["<right>"]
rightAnalysis = loadPath(rightPath)
# Check for output options
if args["--save-left"]:
saveAnalysis($args["--save-left"], leftAnalysis)
if args["--save-right"] and rightAnalysis.allEntries.len > 0:
saveAnalysis($args["--save-right"], rightAnalysis)
# Parse filter options
var displayOptions: DisplayOptions = (
left: false, right: false, same: false, content: false, path: false)
# If none of the explicit selectors are given, assume all are expected.
if not (args["--left-only"] or args["--right-only"] or
args["--same"] or args["--content-mismatch"] or
args["--path-mismatch"] ):
displayOptions = (left: true, right: true, same: true,
content: true, path: true)
if args["--same"]: displayOptions.same = true
if args["--exclude-same"]: displayOptions.same = false
if args["--content-mismatch"]: displayOptions.content = true
if args["--exclude-content-mismatch"]: displayOptions.content = false
if args["--path-mismatch"]: displayOptions.path = true
if args["--exclude-path-mismatch"]: displayOptions.path = false
if args["--left-only"]: displayOptions.left = true
if args["--exclude-left-only"]: displayOptions.left = false
if args["--right-only"]: displayOptions.right = true
if args["--exclude-right-only"]: displayOptions.right = false
# Display output results
if verbosity == normal:
if displayOptions.same:
let sameEntries = leftAnalysis * rightAnalysis
for fe in sameEntries: echo "same: ", fe.relPath
if displayOptions.content:
let contentsDiffer = samePathDifferentContents(leftAnalysis, rightAnalysis)
for path in contentsDiffer: echo "contents differ: ", path
if displayOptions.path:
let pathsDiffer = sameContentsDifferentPaths(leftAnalysis, rightAnalysis)
for pair in pathsDiffer:
echo "paths differ: ", pair.left.relPath, " ", pair.right.relPath
if displayOptions.left:
let leftOnly = leftAnalysis - rightAnalysis
for fe in leftOnly: echo "left only: ", fe.relPath
if displayOptions.right:
let rightOnly = rightAnalysis - leftAnalysis
for fe in rightOnly: echo "right only: ", fe.relPath

View File

@ -1,39 +0,0 @@
import os, docopt, tables, md5, iterutils, re
proc studyDir(root: string, ignore: Iterable[string]): TableRef[string, string] =
result = newTable[string, string]()
for path in walkDirRec(root):
var relPath = substr(path, len(root))
if foldl(ignore, proc (acc: bool, it: string): bool = acc and match(relPath, re(it)), true): continue
var fileInfo = getFileInfo(path)
if fileInfo.kind == pcFile:
result.add(relPath, $(toMD5(readFile(path))))
elif fileInfo.kind == pcDir:
result.add(relPath, "directory")
when isMainModule:
let doc = """
treediff
Usage:
treediff [-i <regex>]... [<path>]...
treediff (-h | --help)
treediff (-v | --version)
Options:
-h --help Show this usage information.
-v --version Show the program version.
"""
let args = docopt(doc, version = "treediff 0.1")
for root in @(args["<path>"]):
echo "Looking at ", root
echo studyDir(root, @(args["<regex>"]))
echo ""

View File

@ -1,11 +1,16 @@
[Package]
name = "treeediff"
version = "0.1.0"
# Package
version = "2.0.1"
author = "Jonathan Bernard (jdb@jdb-labs.com)"
description = "Tree Diff"
description = "Utility to generate diffs of full directory trees."
license = "BSD"
bin = @["treediff"]
srcDir = "src/main/nim"
bin = "treediff"
# Dependencies
requires: @["nim >= 2.0.0", "docopt == 0.7.1", "checksums"]
[Deps]
Requires: "nim >= 0.10.0, docopt >= 0.1.0, iterutils >= 0.1.0"
# Dependencies from git.jdb-software.com/jdb/nim-packages
requires: @["console_progress >= 1.2.2", "update_nim_package_version"]
task updateVersion, "Update the version of this package.":
exec "update_nim_package_version treediff 'src/main/nim/cliconstants.nim'"

6
worklog.md Normal file
View File

@ -0,0 +1,6 @@
To Do
========================================
* Rework the JSON output format so that
the Groovy and Nim implementations can
read each other's saved analysis.