5 Commits
v1.3 ... 1.4.1

8 changed files with 347 additions and 56 deletions

1
.gitignore vendored
View File

@ -2,3 +2,4 @@
nimcache/
build/
.gradle/
/treediff

View File

@ -3,7 +3,7 @@ apply plugin: "application"
apply plugin: "maven"
group = "com.jdblabs"
version = "1.3"
version = "1.4.1"
mainClassName = "com.jdblabs.file.treediff.TreeDiff"
repositories {

View File

@ -8,7 +8,7 @@ import org.apache.commons.codec.digest.DigestUtils
public class TreeDiff {
public static final String VERSION = "1.3"
public static final String VERSION = "1.4.1"
private ObjectMapper objectMapper = new ObjectMapper()
private PrintStream stdout
@ -143,7 +143,7 @@ public class TreeDiff {
if (rootName.startsWith('/')) rightOut = new File(rootName + '.right')
else rightOut = new File(relativeRoot, rootName + '.right')
objectMapper.writeValue(leftOut, left)
objectMapper.writeValue(rightOut, right) }
}
@ -171,7 +171,7 @@ public class TreeDiff {
frame(title: "TreeDif v${VERSION}", show: true) {
boxLayout()
}
}
@ -182,7 +182,7 @@ public class TreeDiff {
}
public static Set<String> samePathDifferentContents(DirAnalysis left, DirAnalysis right) {
return left.allEntries.findAll { l ->
return left.allEntries.findAll { l ->
FileEntry match = right.byRelativePath[l.relativePath]
return match != null && l.checksum != match.checksum }
.collect { it.relativePath } }
@ -300,15 +300,15 @@ where options are:
-g, --gui Launch the graphical interface (not yet implemented).
-s, --same
Output information about files that are the same on both sides.
-S, --exclude-same
Do not output information about files that are the same on both sides.
-c, --content-mismatch
Output information about files that have the same relative path on both
side but whose contents differ.
@ -363,7 +363,7 @@ where options are:
directories to scan for example).
-i, --analysis-in <left-dir-analysis> <right-dir-analysis>
Use pre-calculated directory analysis in place of reading local
directories. This is useful if you wish to do diffs between two
directory trees that are not on the same filesystem, or if you wish to

View File

@ -0,0 +1,44 @@
import md5
import os
proc fileToMD5*(filename: string) : string =
const blockSize: int = 8192
var
c: MD5Context
d: MD5Digest
f: File
bytesRead: int = 0
buffer: array[blockSize, char]
byteTotal: int = 0
#read chunk of file, calling update until all bytes have been read
try:
f = open(filename)
md5Init(c)
bytesRead = f.readBuffer(buffer.addr, blockSize)
while bytesRead > 0:
byteTotal += bytesRead
md5Update(c, buffer, bytesRead)
bytesRead = f.readBuffer(buffer.addr, blockSize)
md5Final(c, d)
except IOError:
echo("File not found.")
finally:
if f != nil:
close(f)
result = $d
when isMainModule:
if paramCount() > 0:
let arguments = commandLineParams()
echo("MD5: ", fileToMD5(arguments[0]))
else:
echo("Must pass filename.")
quit(-1)

281
src/main/nim/treediff.nim Normal file
View File

@ -0,0 +1,281 @@
import os, tables, streams, sequtils, strutils, docopt, marshal
import incremental_md5, console_progress
type
Verbosity* = enum very_quiet, quiet, normal
FileEntry* = tuple[relPath: string, checksum: string]
DirAnalysis* = tuple[allEntries: seq[ref FileEntry],
byRelPath: ref Table[string, ref FileEntry],
byChecksum: ref Table[string, seq[ref FileEntry]]]
ProgressWrapper* = tuple[impl: Progress, verbosity: Verbosity]
DisplayOptions = tuple[left, right, same, content, path: bool]
proc init(p: ProgressWrapper, root: string, fileCount: int): void =
if p.verbosity == normal: echo "-- ", root.expandFilename
if p.verbosity > very_quiet: p.impl.setMax(fileCount)
proc update(p: ProgressWrapper, count: int, file: string): void =
if p.verbosity > very_quiet:
p.impl.updateProgress(count, file[(file.high - 15)..file.high])
proc finish(p: ProgressWrapper): void =
if p.verbosity > very_quiet:
p.impl.erase
if p.verbosity == normal: echo " ", p.impl.getMax, " files.\L"
proc countFiles(root: string): int =
for file in walkDirRec(root):
result += 1
proc getRelPath(ancestor, child: string): string =
let ancestorPath = ancestor.expandFilename.split({DirSep, AltSep})
let childPath = child.expandFilename.split({DirSep, AltSep})
# If the ancestor path is longer it cannot contain the child path and we
# cannot construct a relative path without backtracking.
if (ancestorPath.len > childPath.len): return ""
# Compare the ancestor and child path up until the end of the ancestor path.
var idx = 0
while idx < ancestorPath.len and ancestorPath[idx] == childPath[idx]: idx += 1
# If we stopped before reaching the end of the ancestor path it must be that
# the paths do not match. The ancestor cannot contain the child and we cannot
# build a relative path without backtracking.
if idx != ancestorPath.len: return ""
return foldl(@["."] & childPath[idx..childPath.high], joinPath(a, b))
proc newProgressWrapper*(verbosity: Verbosity): ProgressWrapper =
if verbosity > very_quiet:
result = (impl: newProgress(stdout, 0), verbosity: verbosity)
else: result = (impl: nil, verbosity: verbosity)
proc analyzeDir*(root: string, progress: ProgressWrapper): DirAnalysis =
let fileCount = countFiles(root)
progress.init(root, fileCount)
result = (allEntries: @[],
byRelPath: newTable[string, ref FileEntry](),
byChecksum: newTable[string, seq[ref FileEntry]]())
var count = 0
for file in walkDirRec(root):
# Compute checksum
let md5sum = fileToMd5(file)
var fileEntry: ref FileEntry = new(ref FileEntry)
fileEntry[] = (relPath: getRelPath(root, file), checksum: md5sum )
# Add to allEntries list
result.allEntries.add(fileEntry)
# Add to byRelPath table
result.byRelPath[fileEntry.relPath] = fileEntry
# Add to the byChecksum table
if not result.byChecksum.hasKey(fileEntry.relPath):
result.byChecksum[fileEntry.checksum] = newSeq[ref FileEntry]()
result.byChecksum[fileEntry.checksum].add(fileEntry)
progress.update(count, file)
count += 1
progress.finish()
proc loadAnalysis*(path: string, analysis: var DirAnalysis) =
let inStream: Stream = newFileStream(path, fmRead)
load(inStream, analysis)
proc saveAnalysis*(path: string, analysis: DirAnalysis): void =
let outStream = newFileStream(path, fmWrite)
store(outStream, analysis)
proc intersection*(left, right: DirAnalysis): seq[ref FileEntry] =
return left.allEntries.filter do (item: ref FileEntry) -> bool:
if not right.byRelPath.hasKey(item.relPath): return false
let match = right.byRelPath[item.relPath]
return item.checksum == match.checksum
proc difference*(left, right: DirAnalysis): seq[ref FileEntry] =
return left.allEntries.filter do (item: ref FileEntry) -> bool:
return not right.byRelPath.hasKey(item.relPath) and
not right.byChecksum.hasKey(item.checksum)
proc `*`*(left, right: DirAnalysis): seq[ref FileEntry] {.inline.} =
return intersection(left, right)
proc `-`*(left, right: DirAnalysis): seq[ref FileEntry] {.inline.} =
return difference(left, right)
proc samePathDifferentContents*(left, right: DirAnalysis): seq[string] =
let matchingEntries = left.allEntries.filter do (item: ref FileEntry) -> bool:
if not right.byRelPath.hasKey(item.relPath): return false
let match = right.byRelPath[item.relPath]
return item.checksum != match.checksum
return matchingEntries.map(proc(item: ref FileEntry): string = return item.relPath)
proc sameContentsDifferentPaths*(left, right: DirAnalysis): seq[tuple[left, right: ref FileEntry]] =
result = @[]
for item in left.allEntries:
if not right.byChecksum.hasKey(item.checksum): continue
for match in right.byChecksum[item.checksum]:
if item.relPath != match.relPath: result.add((left: item, right:match))
proc quitWithError(error: string): void =
stderr.writeLine("treediff: " & error)
quit(QuitFailure)
when isMainModule:
let doc = """
Usage:
treediff <left> [<right>] [options]
treediff (-h | --help)
treediff (-V | --version)
<left> and <right> represent paths to directory roots to be compared. If one
of these paths points to a file instead of a directory, treediff assumes that
the file represents a saved directory analysis to be loaded in place of a
directory to compare. For example:
treediff /path/to/dir /path/to/output.json
will analyze the directory tree at '/path/to/dir' to create the left-side
analysis and load a pre-existing analysis from '/path/to/output.json' as the
right-side analysis.
Options:
-h --help Show this usage information.
-V --version Show the program version.
-v --verbose Enable verbose output.
-q --quiet Suppress all output and error messages except for the
progress indicator.
-Q --very-quiet Suppress all output and error messages includeing the
progress indicator.
-1 --save-left <left_out> Save the left analysis to <left_out> (will be
formatted as JSON)
-2 --save-right <right_out> Save the right analysis to <right_out> (will be
formatted as JSON)
-s --same
-S --exclude-same
Show or hide information about files which are the same in both trees.
-c --content-mismatch
-C --exclude-content-mismatch
Show or hide information about files whose relative paths are the same
in both trees but whose contents differ.
-p --path-mismatch
-P --exclude-path-mismatch
Show or hide information about files whose contents are the same in both
trees but whose relative paths differ.
-l --left-only
-L --exclude-left-only
Show or hide information about files which are found only in the left
tree.
-r --right-only
-R --exclude-right-only
Show or hide information about files which are found only in the right
tree.
"""
let args = docopt(doc, version = "treediff v1.4.1")
var verbosity = normal
if args["--quiet"]: verbosity = quiet
if args["--very-quiet"]: verbosity = very_quiet
let progressWrapper = newProgressWrapper(verbosity)
# Load or perform analysis
if not args["<left>"]:
quitWithError("Missing <left> parameter.")
let leftPath: string = $args["<left>"]
let loadPath = proc (path: string): DirAnalysis =
if not path.fileExists and not path.dirExists:
quitWithError($path & ": no such file or directory.")
let fileInfo = path.getFileInfo
if fileInfo.kind == pcDir:
return analyzeDir(path, progressWrapper)
elif fileInfo.kind == pcFile:
loadAnalysis(path, result)
else:
quitWithError($path & ": is not a file or directory")
var leftAnalysis, rightAnalysis: DirAnalysis
leftAnalysis = loadPath(leftPath)
if not args["<right>"]:
rightAnalysis = (allEntries: @[],
byRelPath: newTable[string, ref FileEntry](),
byChecksum: newTable[string, seq[ref FileEntry]]())
else:
var rightPath: string = $args["<right>"]
rightAnalysis = loadPath(rightPath)
# Check for output options
if args["--save-left"]:
saveAnalysis($args["--save-left"], leftAnalysis)
if args["--save-right"] and rightAnalysis.allEntries.len > 0:
saveAnalysis($args["--save-right"], rightAnalysis)
# Parse filter options
var displayOptions: DisplayOptions = (
left: false, right: false, same: false, content: false, path: false)
# If none of the explicit selectors are given, assume all are expected.
if not (args["--left-only"] or args["--right-only"] or
args["--same"] or args["--content-mismatch"] or
args["--path-mismatch"] ):
displayOptions = (left: true, right: true, same: true,
content: true, path: true)
if args["--same"]: displayOptions.same = true
if args["--exclude-same"]: displayOptions.same = false
if args["--content-mismatch"]: displayOptions.content = true
if args["--exclude-content-mismatch"]: displayOptions.content = false
if args["--path-mismatch"]: displayOptions.path = true
if args["--exclude-path-mismatch"]: displayOptions.path = false
if args["--left-only"]: displayOptions.left = true
if args["--exclude-left-only"]: displayOptions.left = false
if args["--right-only"]: displayOptions.right = true
if args["--exclude-right-only"]: displayOptions.right = false
# Display output results
if verbosity == normal:
if displayOptions.same:
let sameEntries = leftAnalysis * rightAnalysis
for fe in sameEntries: echo "same: ", fe.relPath
if displayOptions.content:
let contentsDiffer = samePathDifferentContents(leftAnalysis, rightAnalysis)
for path in contentsDiffer: echo "contents differ: ", path
if displayOptions.path:
let pathsDiffer = sameContentsDifferentPaths(leftAnalysis, rightAnalysis)
for pair in pathsDiffer:
echo "paths differ: ", pair.left.relPath, " ", pair.right.relPath
if displayOptions.left:
let leftOnly = leftAnalysis - rightAnalysis
for fe in leftOnly: echo "left only: ", fe.relPath
if displayOptions.right:
let rightOnly = rightAnalysis - leftAnalysis
for fe in rightOnly: echo "right only: ", fe.relPath

View File

@ -1,39 +0,0 @@
import os, docopt, tables, md5, iterutils, re
proc studyDir(root: string, ignore: Iterable[string]): TableRef[string, string] =
result = newTable[string, string]()
for path in walkDirRec(root):
var relPath = substr(path, len(root))
if foldl(ignore, proc (acc: bool, it: string): bool = acc and match(relPath, re(it)), true): continue
var fileInfo = getFileInfo(path)
if fileInfo.kind == pcFile:
result.add(relPath, $(toMD5(readFile(path))))
elif fileInfo.kind == pcDir:
result.add(relPath, "directory")
when isMainModule:
let doc = """
treediff
Usage:
treediff [-i <regex>]... [<path>]...
treediff (-h | --help)
treediff (-v | --version)
Options:
-h --help Show this usage information.
-v --version Show the program version.
"""
let args = docopt(doc, version = "treediff 0.1")
for root in @(args["<path>"]):
echo "Looking at ", root
echo studyDir(root, @(args["<regex>"]))
echo ""

View File

@ -1,11 +1,10 @@
[Package]
name = "treeediff"
version = "0.1.0"
# Package
version = "1.4.1"
author = "Jonathan Bernard (jdb@jdb-labs.com)"
description = "Tree Diff"
description = "Utility to generate diffs of full directory trees."
license = "BSD"
bin = @["treediff"]
srcDir = "src/main/nim"
bin = "treediff"
[Deps]
Requires: "nim >= 0.10.0, docopt >= 0.1.0, iterutils >= 0.1.0"
# Dependencies
requires: @["nim >= 0.13.0", "docopt >= 0.1.0", "console_progress >= 1.2.1"]

5
worklog.md Normal file
View File

@ -0,0 +1,5 @@
Current Task
========================================
Currently implementing `countFiles` in
treediff.nim