Compare commits

...

7 Commits
1.4.2 ... main

6 changed files with 166 additions and 138 deletions

View File

@ -3,17 +3,18 @@ apply plugin: "application"
apply plugin: "maven"
group = "com.jdblabs"
version = "1.4.2"
version = "1.4.4"
mainClassName = "com.jdblabs.file.treediff.TreeDiff"
repositories {
mavenLocal()
mavenCentral() }
mavenCentral()
maven { url "http://mvn.jdb-labs.com/repo" } }
dependencies {
compile 'org.codehaus.groovy:groovy-all:2.4.3'
compile 'com.jdbernard:jdb-util:3.8'
compile 'commons-codec:commons-codec:1.10'
compile localGroovy()
compile 'com.jdbernard:jdb-util:4.+'
compile 'commons-codec:commons-codec:1.10'
compile 'com.fasterxml.jackson.core:jackson-databind:2.4.4'
testCompile 'junit:junit:4.12'

View File

@ -8,7 +8,7 @@ import org.apache.commons.codec.digest.DigestUtils
public class TreeDiff {
public static final String VERSION = "1.4.2"
public static final String VERSION = "1.4.3"
private ObjectMapper objectMapper = new ObjectMapper()
private PrintStream stdout

View File

@ -0,0 +1,63 @@
const VERSION* = "2.0.1"
const USAGE* = """
Usage:
treediff <left> [<right>] [options]
treediff (-h | --help)
treediff (-V | --version)
<left> and <right> represent paths to directory roots to be compared. If one
of these paths points to a file instead of a directory, treediff assumes that
the file represents a saved directory analysis to be loaded in place of a
directory to compare. For example:
treediff /path/to/dir /path/to/output.json
will analyze the directory tree at '/path/to/dir' to create the left-side
analysis and load a pre-existing analysis from '/path/to/output.json' as the
right-side analysis.
Options:
-h --help Show this usage information.
-V --version Show the program version.
-v --verbose Enable verbose output.
-q --quiet Suppress all output and error messages except for the
progress indicator.
-Q --very-quiet Suppress all output and error messages includeing the
progress indicator.
-1 --save-left <left_out> Save the left analysis to <left_out> (will be
formatted as JSON)
-2 --save-right <right_out> Save the right analysis to <right_out> (will be
formatted as JSON)
-s --same
-S --exclude-same
Show or hide information about files which are the same in both trees.
-c --content-mismatch
-C --exclude-content-mismatch
Show or hide information about files whose relative paths are the same
in both trees but whose contents differ.
-p --path-mismatch
-P --exclude-path-mismatch
Show or hide information about files whose contents are the same in both
trees but whose relative paths differ.
-l --left-only
-L --exclude-left-only
Show or hide information about files which are found only in the left
tree.
-r --right-only
-R --exclude-right-only
Show or hide information about files which are found only in the right
tree.
"""

View File

@ -1,37 +1,33 @@
import md5
import os
import std/streams
import checksums/md5
proc fileToMD5*(filename: string) : string =
const blockSize: int = 8192
const blockSize: int = 8192 # read files in 8KB chunnks
var
c: MD5Context
d: MD5Digest
f: File
bytesRead: int = 0
buffer: array[blockSize, char]
byteTotal: int = 0
fs: FileStream
buffer: string
#read chunk of file, calling update until all bytes have been read
try:
f = open(filename)
fs = filename.open.newFileStream
md5Init(c)
bytesRead = f.readBuffer(buffer.addr, blockSize)
while bytesRead > 0:
byteTotal += bytesRead
md5Update(c, buffer, bytesRead)
bytesRead = f.readBuffer(buffer.addr, blockSize)
buffer = fs.readStr(blockSize)
while buffer.len > 0:
md5Update(c, buffer.cstring, buffer.len)
buffer = fs.readStr(blockSize)
md5Final(c, d)
except IOError:
echo("File not found.")
except IOError: echo("File not found.")
finally:
if f != nil:
close(f)
if fs != nil:
close(fs)
result = $d
when isMainModule:

View File

@ -3,9 +3,11 @@
##
## Utility to compare the file contents of two directory trees.
import os, tables, streams, sequtils, strutils, docopt, marshal
import std/[json, jsonutils, os, tables, sequtils, strutils]
import docopt
import incremental_md5, console_progress
import ./cliconstants
type
@ -20,16 +22,17 @@ type
proc newProgressWrapper*(outFile = stdout, verbosity = normal): ProgressWrapper =
## Create a new ProgressWrapper for the given verbosity.
if verbosity > very_quiet:
result = (impl: newProgress(outFile, 0), verbosity: verbosity)
result = (impl: newProgress(0, outFile), verbosity: verbosity)
else: result = (impl: nil, verbosity: verbosity)
proc init(p: ProgressWrapper, root: string, fileCount: int): void =
if p.verbosity == normal: echo "-- ", root.expandFilename
if p.verbosity == normal:
echo "-- ", root.expandFilename, "\L ", fileCount, " files"
if p.verbosity > very_quiet: p.impl.setMax(fileCount)
proc update(p: ProgressWrapper, count: int, file: string): void =
if p.verbosity > very_quiet:
p.impl.updateProgress(count, file[(file.high - 15)..file.high])
p.impl.updateProgress(count, file[max(file.high - 15, 0)..file.high])
proc finish(p: ProgressWrapper): void =
if p.verbosity > very_quiet:
@ -62,99 +65,121 @@ proc getRelPath(ancestor, child: string): string =
# build a relative path without backtracking.
if idx != ancestorPath.len: return ""
return foldl(@["."] & childPath[idx..childPath.high], joinPath(a, b))
type
FileEntry* = tuple[relPath: string, checksum: string]
FileEntry* = ref tuple[relPath: string, checksum: string]
## Data about one file that has been analyzed
DirAnalysis* = ## Analysis data about one directory tree.
tuple[allEntries: seq[ref FileEntry],
byRelPath: ref Table[string, ref FileEntry],
byChecksum: ref Table[string, seq[ref FileEntry]]]
tuple[allEntries: seq[FileEntry],
byRelPath: TableRef[string, FileEntry],
byChecksum: TableRef[string, seq[FileEntry]]]
DisplayOptions = tuple[left, right, same, content, path: bool]
## Consolidated description of which types of results to display.
func `$`(f: FileEntry): string = f.checksum & ": " & f.relPath
proc getOrFail(n: JsonNode, key: string, objName: string = ""): JsonNode =
## convenience method to get a key from a JObject or raise an exception
if not n.hasKey(key): raise newException(Exception, objName & " missing key '" & key & "'")
return n[key]
proc getIfExists(n: JsonNode, key: string): JsonNode =
## convenience method to get a key from a JObject or return null
result = if n.hasKey(key): n[key]
else: newJNull()
func parseFileEntry(n: JsonNode): FileEntry =
result = new(FileEntry)
result.relPath = n.getOrFail("relPath").getStr
result.checksum = n.getOrFail("checksum").getStr
func initDirAnalysis(): DirAnalysis =
(allEntries: @[],
byRelPath: newTable[string, FileEntry](),
byChecksum: newTable[string, seq[FileEntry]]())
func indexEntries(da: var DirAnalysis) =
for e in da.allEntries:
da.byRelPath[e.relPath] = e
if not da.byChecksum.hasKey(e.checksum):
da.byChecksum[e.checksum] = newSeq[FileEntry]()
da.byChecksum[e.checksum].add(e)
proc analyzeDir*(root: string, progress: ProgressWrapper): DirAnalysis =
## Inspect a directory and analyze all files, noting their relative paths and
## checksum of their contents.
let fileCount = countFiles(root)
progress.init(root, fileCount)
progress.init(root, fileCount + 10)
result = (allEntries: @[],
byRelPath: newTable[string, ref FileEntry](),
byChecksum: newTable[string, seq[ref FileEntry]]())
result = initDirAnalysis()
var count = 0
for file in walkDirRec(root):
# Compute checksum
let md5sum = fileToMd5(file)
var fileEntry: ref FileEntry = new(ref FileEntry)
fileEntry[] = (relPath: getRelPath(root, file), checksum: md5sum )
# Add to allEntries list, byRelPath table, and byChecksum table
var fileEntry: FileEntry = new(FileEntry)
fileEntry[] = (relPath: getRelPath(root, file), checksum: md5sum)
result.allEntries.add(fileEntry)
result.byRelPath[fileEntry.relPath] = fileEntry
if not result.byChecksum.hasKey(fileEntry.relPath):
result.byChecksum[fileEntry.checksum] = newSeq[ref FileEntry]()
result.byChecksum[fileEntry.checksum].add(fileEntry)
progress.update(count, file)
count += 1
result.indexEntries
count += 10
progress.finish()
proc loadAnalysis*(path: string, analysis: var DirAnalysis) =
proc loadAnalysis*(path: string): DirAnalysis =
## Load a previously performed directory analysis.
let inStream: Stream = newFileStream(path, fmRead)
load(inStream, analysis)
let allEntriesJson = parseJson(readFile(path))
result = initDirAnalysis()
result.allEntries = toSeq(items(allEntriesJson)).map(parseFileEntry)
result.indexEntries
proc saveAnalysis*(path: string, analysis: DirAnalysis): void =
## Save a completed analysis.
let outStream = newFileStream(path, fmWrite)
store(outStream, analysis)
writeFile(path, $(analysis.allEntries.toJson))
proc intersection*(left, right: DirAnalysis): seq[ref FileEntry] =
proc intersection*(left, right: DirAnalysis): seq[FileEntry] =
## Find all ``FileEntry`` that are the same on both sides: matching contents
## and paths.
return left.allEntries.filter do (item: ref FileEntry) -> bool:
return left.allEntries.filter do (item: FileEntry) -> bool:
if not right.byRelPath.hasKey(item.relPath): return false
let match = right.byRelPath[item.relPath]
if match == nil: return false
return item.checksum == match.checksum
proc difference*(left, right: DirAnalysis): seq[ref FileEntry] =
proc difference*(left, right: DirAnalysis): seq[FileEntry] =
## Find all ``FileEntry`` that are present in the left but not present in
## the right.
return left.allEntries.filter do (item: ref FileEntry) -> bool:
return left.allEntries.filter do (item: FileEntry) -> bool:
return not right.byRelPath.hasKey(item.relPath) and
not right.byChecksum.hasKey(item.checksum)
proc `*`*(left, right: DirAnalysis): seq[ref FileEntry] {.inline.} =
proc `*`*(left, right: DirAnalysis): seq[FileEntry] {.inline.} =
## Alias for `intersection(left, right) <#intersection>`_
return intersection(left, right)
proc `-`*(left, right: DirAnalysis): seq[ref FileEntry] {.inline.} =
proc `-`*(left, right: DirAnalysis): seq[FileEntry] {.inline.} =
## Alias for `difference(left, right) <#difference>`_
return difference(left, right)
proc samePathDifferentContents*(left, right: DirAnalysis): seq[string] =
## Find all ``FileEntry`` that have the same paths in both trees but whose
## contents differ.
let matchingEntries = left.allEntries.filter do (item: ref FileEntry) -> bool:
let matchingEntries = left.allEntries.filter do (item: FileEntry) -> bool:
if not right.byRelPath.hasKey(item.relPath): return false
let match = right.byRelPath[item.relPath]
return item.checksum != match.checksum
return matchingEntries.map(proc(item: ref FileEntry): string = return item.relPath)
return matchingEntries.map(proc(item: FileEntry): string = return item.relPath)
proc sameContentsDifferentPaths*(left, right: DirAnalysis): seq[tuple[left, right: ref FileEntry]] =
proc sameContentsDifferentPaths*(left, right: DirAnalysis): seq[tuple[left, right: FileEntry]] =
## Find all ``FileEntry`` whose contents are the same in both trees but
## which are located at differenc paths.
result = @[]
@ -169,69 +194,7 @@ when isMainModule:
stderr.writeLine("treediff: " & error)
quit(QuitFailure)
let doc = """
Usage:
treediff <left> [<right>] [options]
treediff (-h | --help)
treediff (-V | --version)
<left> and <right> represent paths to directory roots to be compared. If one
of these paths points to a file instead of a directory, treediff assumes that
the file represents a saved directory analysis to be loaded in place of a
directory to compare. For example:
treediff /path/to/dir /path/to/output.json
will analyze the directory tree at '/path/to/dir' to create the left-side
analysis and load a pre-existing analysis from '/path/to/output.json' as the
right-side analysis.
Options:
-h --help Show this usage information.
-V --version Show the program version.
-v --verbose Enable verbose output.
-q --quiet Suppress all output and error messages except for the
progress indicator.
-Q --very-quiet Suppress all output and error messages includeing the
progress indicator.
-1 --save-left <left_out> Save the left analysis to <left_out> (will be
formatted as JSON)
-2 --save-right <right_out> Save the right analysis to <right_out> (will be
formatted as JSON)
-s --same
-S --exclude-same
Show or hide information about files which are the same in both trees.
-c --content-mismatch
-C --exclude-content-mismatch
Show or hide information about files whose relative paths are the same
in both trees but whose contents differ.
-p --path-mismatch
-P --exclude-path-mismatch
Show or hide information about files whose contents are the same in both
trees but whose relative paths differ.
-l --left-only
-L --exclude-left-only
Show or hide information about files which are found only in the left
tree.
-r --right-only
-R --exclude-right-only
Show or hide information about files which are found only in the right
tree.
"""
let args = docopt(doc, version = "treediff v1.4.2")
let args = docopt(USAGE, version = "treediff " & VERSION)
var verbosity = normal
if args["--quiet"]: verbosity = quiet
@ -252,7 +215,7 @@ Options:
if fileInfo.kind == pcDir:
return analyzeDir(path, progressWrapper)
elif fileInfo.kind == pcFile:
loadAnalysis(path, result)
result = loadAnalysis(path)
else:
quitWithError($path & ": is not a file or directory")
@ -262,8 +225,8 @@ Options:
if not args["<right>"]:
rightAnalysis = (allEntries: @[],
byRelPath: newTable[string, ref FileEntry](),
byChecksum: newTable[string, seq[ref FileEntry]]())
byRelPath: newTable[string, FileEntry](),
byChecksum: newTable[string, seq[FileEntry]]())
else:
var rightPath: string = $args["<right>"]
rightAnalysis = loadPath(rightPath)
@ -274,7 +237,7 @@ Options:
if args["--save-right"] and rightAnalysis.allEntries.len > 0:
saveAnalysis($args["--save-right"], rightAnalysis)
# Parse filter options
var displayOptions: DisplayOptions = (
left: false, right: false, same: false, content: false, path: false)
@ -319,4 +282,3 @@ Options:
if displayOptions.right:
let rightOnly = rightAnalysis - leftAnalysis
for fe in rightOnly: echo "right only: ", fe.relPath

View File

@ -1,5 +1,5 @@
# Package
version = "1.4.2"
version = "2.0.1"
author = "Jonathan Bernard (jdb@jdb-labs.com)"
description = "Utility to generate diffs of full directory trees."
license = "BSD"
@ -7,4 +7,10 @@ bin = @["treediff"]
srcDir = "src/main/nim"
# Dependencies
requires: @["nim >= 0.13.0", "docopt >= 0.1.0", "console_progress >= 1.2.1"]
requires: @["nim >= 2.0.0", "docopt == 0.7.1", "checksums"]
# Dependencies from git.jdb-software.com/jdb/nim-packages
requires: @["console_progress >= 1.2.2", "update_nim_package_version"]
task updateVersion, "Update the version of this package.":
exec "update_nim_package_version treediff 'src/main/nim/cliconstants.nim'"