Nim implementation.

This commit is contained in:
Jonathan Bernard 2016-01-26 00:29:49 -06:00
parent ab99661720
commit c16a5a684d
4 changed files with 315 additions and 43 deletions

44
incremental_md5.nim Normal file
View File

@ -0,0 +1,44 @@
import md5
import os
proc fileToMD5*(filename: string) : string =
const blockSize: int = 8192
var
c: MD5Context
d: MD5Digest
f: File
bytesRead: int = 0
buffer: array[blockSize, char]
byteTotal: int = 0
#read chunk of file, calling update until all bytes have been read
try:
f = open(filename)
md5Init(c)
bytesRead = f.readBuffer(buffer.addr, blockSize)
while bytesRead > 0:
byteTotal += bytesRead
md5Update(c, buffer, bytesRead)
bytesRead = f.readBuffer(buffer.addr, blockSize)
md5Final(c, d)
except IOError:
echo("File not found.")
finally:
if f != nil:
close(f)
result = $d
when isMainModule:
if paramCount() > 0:
let arguments = commandLineParams()
echo("MD5: ", fileToMD5(arguments[0]))
else:
echo("Must pass filename.")
quit(-1)

View File

@ -143,7 +143,7 @@ public class TreeDiff {
if (rootName.startsWith('/')) rightOut = new File(rootName + '.right')
else rightOut = new File(relativeRoot, rootName + '.right')
objectMapper.writeValue(leftOut, left)
objectMapper.writeValue(rightOut, right) }
}
@ -171,7 +171,7 @@ public class TreeDiff {
frame(title: "TreeDif v${VERSION}", show: true) {
boxLayout()
}
}
@ -182,7 +182,7 @@ public class TreeDiff {
}
public static Set<String> samePathDifferentContents(DirAnalysis left, DirAnalysis right) {
return left.allEntries.findAll { l ->
return left.allEntries.findAll { l ->
FileEntry match = right.byRelativePath[l.relativePath]
return match != null && l.checksum != match.checksum }
.collect { it.relativePath } }
@ -300,15 +300,15 @@ where options are:
-g, --gui Launch the graphical interface (not yet implemented).
-s, --same
Output information about files that are the same on both sides.
-S, --exclude-same
Do not output information about files that are the same on both sides.
-c, --content-mismatch
Output information about files that have the same relative path on both
side but whose contents differ.
@ -363,7 +363,7 @@ where options are:
directories to scan for example).
-i, --analysis-in <left-dir-analysis> <right-dir-analysis>
Use pre-calculated directory analysis in place of reading local
directories. This is useful if you wish to do diffs between two
directory trees that are not on the same filesystem, or if you wish to

View File

@ -1,51 +1,281 @@
import os, docopt, tables, md5, iterutils, re
import os, tables, streams, sequtils, strutils, docopt, marshal
import incremental_md5, console_progress
type
Verbosity* = enum very_quiet, quiet, normal
FileEntry* = tuple[relPath: string, checksum: string]
DirAnalysis* = tuple[allEntries: seq[FileEntry],
byRelPath: Table[string, FileEntry],
byChecksum: Table[string, FileEntry]]
DirAnalysis* = tuple[allEntries: seq[ref FileEntry],
byRelPath: ref Table[string, ref FileEntry],
byChecksum: ref Table[string, seq[ref FileEntry]]]
ProgressWrapper* = tuple[impl: Progress, verbosity: Verbosity]
DisplayOptions = tuple[left, right, same, content, path: bool]
proc analyzeDir(root: string): DirAnalysis =
let fileCount = countFiles(root)
proc init(p: ProgressWrapper, root: string, fileCount: int): void =
if p.verbosity == normal: echo "-- ", root.expandFilename
if p.verbosity > very_quiet: p.impl.setMax(fileCount)
proc update(p: ProgressWrapper, count: int, file: string): void =
if p.verbosity > very_quiet:
p.impl.updateProgress(count, file[(file.high - 15)..file.high])
proc finish(p: ProgressWrapper): void =
if p.verbosity > very_quiet:
p.impl.erase
echo " ", p.impl.getMax, " files.\L"
proc countFiles(root: string): int =
# TODO
for file in walkDirRec(root):
result += 1
proc studyDir(root: string, ignore: Iterable[string]): TableRef[string, string] =
result = newTable[string, string]()
proc getRelPath(ancestor, child: string): string =
let ancestorPath = ancestor.expandFilename.split({DirSep, AltSep})
let childPath = child.expandFilename.split({DirSep, AltSep})
for path in walkDirRec(root):
var relPath = substr(path, len(root))
# If the ancestor path is longer it cannot contain the child path and we
# cannot construct a relative path without backtracking.
if (ancestorPath.len > childPath.len): return ""
if foldl(ignore, proc (acc: bool, it: string): bool = acc and match(relPath, re(it)), true): continue
# Compare the ancestor and child path up until the end of the ancestor path.
var idx = 0
while idx < ancestorPath.len and ancestorPath[idx] == childPath[idx]: idx += 1
var fileInfo = getFileInfo(path)
# If we stopped before reaching the end of the ancestor path it must be that
# the paths do not match. The ancestor cannot contain the child and we cannot
# build a relative path without backtracking.
if idx != ancestorPath.len: return ""
return foldl(@["."] & childPath[idx..childPath.high], joinPath(a, b))
proc newProgressWrapper*(verbosity: Verbosity): ProgressWrapper =
if verbosity > very_quiet:
result = (impl: newProgress(stdout, 0), verbosity: verbosity)
else: result = (impl: nil, verbosity: verbosity)
if fileInfo.kind == pcFile:
result.add(relPath, $(toMD5(readFile(path))))
elif fileInfo.kind == pcDir:
result.add(relPath, "directory")
proc analyzeDir*(root: string, progress: ProgressWrapper): DirAnalysis =
let fileCount = countFiles(root)
progress.init(root, fileCount)
result = (allEntries: @[],
byRelPath: newTable[string, ref FileEntry](),
byChecksum: newTable[string, seq[ref FileEntry]]())
var count = 0
for file in walkDirRec(root):
# Compute checksum
let md5sum = fileToMd5(file)
var fileEntry: ref FileEntry = new(ref FileEntry)
fileEntry[] = (relPath: getRelPath(root, file), checksum: md5sum )
# Add to allEntries list
result.allEntries.add(fileEntry)
# Add to byRelPath table
result.byRelPath[fileEntry.relPath] = fileEntry
# Add to the byChecksum table
if not result.byChecksum.hasKey(fileEntry.relPath):
result.byChecksum[fileEntry.checksum] = newSeq[ref FileEntry]()
result.byChecksum[fileEntry.checksum].add(fileEntry)
progress.update(count, file)
count += 1
progress.finish()
proc loadAnalysis*(path: string, analysis: var DirAnalysis) =
let inStream: Stream = newFileStream(path, fmRead)
load(inStream, analysis)
proc saveAnalysis*(path: string, analysis: DirAnalysis): void =
let outStream = newFileStream(path, fmWrite)
store(outStream, analysis)
proc intersection*(left, right: DirAnalysis): seq[ref FileEntry] =
return left.allEntries.filter do (item: ref FileEntry) -> bool:
if not right.byRelPath.hasKey(item.relPath): return false
let match = right.byRelPath[item.relPath]
return item.checksum == match.checksum
proc difference*(left, right: DirAnalysis): seq[ref FileEntry] =
return left.allEntries.filter do (item: ref FileEntry) -> bool:
return not right.byRelPath.hasKey(item.relPath) and
not right.byChecksum.hasKey(item.checksum)
proc `*`*(left, right: DirAnalysis): seq[ref FileEntry] {.inline.} =
return intersection(left, right)
proc `-`*(left, right: DirAnalysis): seq[ref FileEntry] {.inline.} =
return difference(left, right)
proc samePathDifferentContents*(left, right: DirAnalysis): seq[string] =
let matchingEntries = left.allEntries.filter do (item: ref FileEntry) -> bool:
if not right.byRelPath.hasKey(item.relPath): return false
let match = right.byRelPath[item.relPath]
return item.checksum != match.checksum
return matchingEntries.map(proc(item: ref FileEntry): string = return item.relPath)
proc sameContentsDifferentPaths*(left, right: DirAnalysis): seq[tuple[left, right: ref FileEntry]] =
result = @[]
for item in left.allEntries:
if not right.byChecksum.hasKey(item.checksum): continue
for match in right.byChecksum[item.checksum]:
if item.relPath != match.relPath: result.add((left: item, right:match))
proc quitWithError(error: string): void =
stderr.writeLine("treediff: " & error)
quit(QuitFailure)
when isMainModule:
let doc = """
treediff
let doc = """
Usage:
treediff [-i <regex>]... [<path>]...
treediff <left> [<right>] [options]
treediff (-h | --help)
treediff (-v | --version)
treediff (-V | --version)
<left> and <right> represent paths to directory roots to be compared. If one
of these paths points to a file instead of a directory, treediff assumes that
the file represents a saved directory analysis to be loaded in place of a
directory to compare. For example:
treediff /path/to/dir /path/to/output.json
will analyze the directory tree at '/path/to/dir' to create the left-side
analysis and load a pre-existing analysis from '/path/to/output.json' as the
right-side analysis.
Options:
-h --help Show this usage information.
-v --version Show the program version.
-h --help Show this usage information.
-V --version Show the program version.
-v --verbose Enable verbose output.
-q --quiet Suppress all output and error messages except for the
progress indicator.
-Q --very-quiet Suppress all output and error messages includeing the
progress indicator.
-1 --save-left <left_out> Save the left analysis to <left_out> (will be
formatted as JSON)
-2 --save-right <right_out> Save the right analysis to <right_out> (will be
formatted as JSON)
-s --same
-S --exclude-same
Show or hide information about files which are the same in both trees.
-c --content-mismatch
-C --exclude-content-mismatch
Show or hide information about files whose relative paths are the same
in both trees but whose contents differ.
-p --path-mismatch
-P --exclude-path-mismatch
Show or hide information about files whose contents are the same in both
trees but whose relative paths differ.
-l --left-only
-L --exclude-left-only
Show or hide information about files which are found only in the left
tree.
-r --right-only
-R --exclude-right-only
Show or hide information about files which are found only in the right
tree.
"""
let args = docopt(doc, version = "treediff 0.1")
let args = docopt(doc, version = "treediff v0.1")
for root in @(args["<path>"]):
echo "Looking at ", root
var verbosity = normal
if args["--quiet"]: verbosity = quiet
if args["--very-quiet"]: verbosity = very_quiet
let progressWrapper = newProgressWrapper(verbosity)
# Load or perform analysis
if not args["<left>"]:
quitWithError("Missing <left> parameter.")
let leftPath: string = $args["<left>"]
let loadPath = proc (path: string): DirAnalysis =
if not path.fileExists and not path.dirExists:
quitWithError($path & ": no such file or directory.")
let fileInfo = path.getFileInfo
if fileInfo.kind == pcDir:
return analyzeDir(path, progressWrapper)
elif fileInfo.kind == pcFile:
loadAnalysis(path, result)
else:
quitWithError($path & ": is not a file or directory")
var leftAnalysis, rightAnalysis: DirAnalysis
leftAnalysis = loadPath(leftPath)
if not args["<right>"]:
rightAnalysis = (allEntries: @[],
byRelPath: newTable[string, ref FileEntry](),
byChecksum: newTable[string, seq[ref FileEntry]]())
else:
var rightPath: string = $args["<right>"]
rightAnalysis = loadPath(rightPath)
# Check for output options
if args["--save-left"]:
saveAnalysis($args["--save-left"], leftAnalysis)
if args["--save-right"] and rightAnalysis.allEntries.len > 0:
saveAnalysis($args["--save-right"], rightAnalysis)
# Parse filter options
var displayOptions: DisplayOptions = (
left: false, right: false, same: false, content: false, path: false)
# If none of the explicit selectors are given, assume all are expected.
if not (args["--left-only"] or args["--right-only"] or
args["--same"] or args["--content-mismatch"] or
args["--path-mismatch"] ):
displayOptions = (left: true, right: true, same: true,
content: true, path: true)
if args["--same"]: displayOptions.same = true
if args["--exclude-same"]: displayOptions.same = false
if args["--content-mismatch"]: displayOptions.content = true
if args["--exclude-content-mismatch"]: displayOptions.content = false
if args["--path-mismatch"]: displayOptions.path = true
if args["--exclude-path-mismatch"]: displayOptions.path = false
if args["--left-only"]: displayOptions.left = true
if args["--exclude-left-only"]: displayOptions.left = false
if args["--right-only"]: displayOptions.right = true
if args["--exclude-right-only"]: displayOptions.right = false
# Display output results
if verbosity == normal:
if displayOptions.same:
let sameEntries = leftAnalysis * rightAnalysis
for fe in sameEntries: echo "same: ", fe.relPath
if displayOptions.content:
let contentsDiffer = samePathDifferentContents(leftAnalysis, rightAnalysis)
for path in contentsDiffer: echo "contents differ: ", path
if displayOptions.path:
let pathsDiffer = sameContentsDifferentPaths(leftAnalysis, rightAnalysis)
for pair in pathsDiffer:
echo "paths differ: ", pair.left.relPath, " ", pair.right.relPath
if displayOptions.left:
let leftOnly = leftAnalysis - rightAnalysis
for fe in leftOnly: echo "left only: ", fe.relPath
if displayOptions.right:
let rightOnly = rightAnalysis - leftAnalysis
for fe in rightOnly: echo "right only: ", fe.relPath
echo studyDir(root, @(args["<regex>"]))
echo ""

View File

@ -1,11 +1,9 @@
[Package]
name = "treeediff"
# Package
version = "0.1.0"
author = "Jonathan Bernard (jdb@jdb-labs.com)"
description = "Tree Diff"
description = "Utility to generate diffs of full directory trees."
license = "BSD"
bin = @["treediff"]
bin = "treediff"
[Deps]
Requires: "nim >= 0.10.0, docopt >= 0.1.0, iterutils >= 0.1.0"
# Dependencies
requires: @["nim >= 0.13.0", "docopt >= 0.1.0", "console_progress >= 1.2"]