Nim implementation.
This commit is contained in:
parent
ab99661720
commit
c16a5a684d
44
incremental_md5.nim
Normal file
44
incremental_md5.nim
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
import md5
|
||||||
|
import os
|
||||||
|
|
||||||
|
proc fileToMD5*(filename: string) : string =
|
||||||
|
|
||||||
|
const blockSize: int = 8192
|
||||||
|
var
|
||||||
|
c: MD5Context
|
||||||
|
d: MD5Digest
|
||||||
|
f: File
|
||||||
|
bytesRead: int = 0
|
||||||
|
buffer: array[blockSize, char]
|
||||||
|
byteTotal: int = 0
|
||||||
|
|
||||||
|
#read chunk of file, calling update until all bytes have been read
|
||||||
|
try:
|
||||||
|
f = open(filename)
|
||||||
|
|
||||||
|
md5Init(c)
|
||||||
|
bytesRead = f.readBuffer(buffer.addr, blockSize)
|
||||||
|
|
||||||
|
while bytesRead > 0:
|
||||||
|
byteTotal += bytesRead
|
||||||
|
md5Update(c, buffer, bytesRead)
|
||||||
|
bytesRead = f.readBuffer(buffer.addr, blockSize)
|
||||||
|
|
||||||
|
md5Final(c, d)
|
||||||
|
|
||||||
|
except IOError:
|
||||||
|
echo("File not found.")
|
||||||
|
finally:
|
||||||
|
if f != nil:
|
||||||
|
close(f)
|
||||||
|
|
||||||
|
result = $d
|
||||||
|
|
||||||
|
when isMainModule:
|
||||||
|
|
||||||
|
if paramCount() > 0:
|
||||||
|
let arguments = commandLineParams()
|
||||||
|
echo("MD5: ", fileToMD5(arguments[0]))
|
||||||
|
else:
|
||||||
|
echo("Must pass filename.")
|
||||||
|
quit(-1)
|
@ -143,7 +143,7 @@ public class TreeDiff {
|
|||||||
|
|
||||||
if (rootName.startsWith('/')) rightOut = new File(rootName + '.right')
|
if (rootName.startsWith('/')) rightOut = new File(rootName + '.right')
|
||||||
else rightOut = new File(relativeRoot, rootName + '.right')
|
else rightOut = new File(relativeRoot, rootName + '.right')
|
||||||
|
|
||||||
objectMapper.writeValue(leftOut, left)
|
objectMapper.writeValue(leftOut, left)
|
||||||
objectMapper.writeValue(rightOut, right) }
|
objectMapper.writeValue(rightOut, right) }
|
||||||
}
|
}
|
||||||
@ -171,7 +171,7 @@ public class TreeDiff {
|
|||||||
frame(title: "TreeDif v${VERSION}", show: true) {
|
frame(title: "TreeDif v${VERSION}", show: true) {
|
||||||
boxLayout()
|
boxLayout()
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -182,7 +182,7 @@ public class TreeDiff {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public static Set<String> samePathDifferentContents(DirAnalysis left, DirAnalysis right) {
|
public static Set<String> samePathDifferentContents(DirAnalysis left, DirAnalysis right) {
|
||||||
return left.allEntries.findAll { l ->
|
return left.allEntries.findAll { l ->
|
||||||
FileEntry match = right.byRelativePath[l.relativePath]
|
FileEntry match = right.byRelativePath[l.relativePath]
|
||||||
return match != null && l.checksum != match.checksum }
|
return match != null && l.checksum != match.checksum }
|
||||||
.collect { it.relativePath } }
|
.collect { it.relativePath } }
|
||||||
@ -300,15 +300,15 @@ where options are:
|
|||||||
-g, --gui Launch the graphical interface (not yet implemented).
|
-g, --gui Launch the graphical interface (not yet implemented).
|
||||||
|
|
||||||
-s, --same
|
-s, --same
|
||||||
|
|
||||||
Output information about files that are the same on both sides.
|
Output information about files that are the same on both sides.
|
||||||
|
|
||||||
-S, --exclude-same
|
-S, --exclude-same
|
||||||
|
|
||||||
Do not output information about files that are the same on both sides.
|
Do not output information about files that are the same on both sides.
|
||||||
|
|
||||||
-c, --content-mismatch
|
-c, --content-mismatch
|
||||||
|
|
||||||
Output information about files that have the same relative path on both
|
Output information about files that have the same relative path on both
|
||||||
side but whose contents differ.
|
side but whose contents differ.
|
||||||
|
|
||||||
@ -363,7 +363,7 @@ where options are:
|
|||||||
directories to scan for example).
|
directories to scan for example).
|
||||||
|
|
||||||
-i, --analysis-in <left-dir-analysis> <right-dir-analysis>
|
-i, --analysis-in <left-dir-analysis> <right-dir-analysis>
|
||||||
|
|
||||||
Use pre-calculated directory analysis in place of reading local
|
Use pre-calculated directory analysis in place of reading local
|
||||||
directories. This is useful if you wish to do diffs between two
|
directories. This is useful if you wish to do diffs between two
|
||||||
directory trees that are not on the same filesystem, or if you wish to
|
directory trees that are not on the same filesystem, or if you wish to
|
||||||
|
288
treediff.nim
288
treediff.nim
@ -1,51 +1,281 @@
|
|||||||
import os, docopt, tables, md5, iterutils, re
|
import os, tables, streams, sequtils, strutils, docopt, marshal
|
||||||
|
import incremental_md5, console_progress
|
||||||
|
|
||||||
type
|
type
|
||||||
|
Verbosity* = enum very_quiet, quiet, normal
|
||||||
FileEntry* = tuple[relPath: string, checksum: string]
|
FileEntry* = tuple[relPath: string, checksum: string]
|
||||||
DirAnalysis* = tuple[allEntries: seq[FileEntry],
|
DirAnalysis* = tuple[allEntries: seq[ref FileEntry],
|
||||||
byRelPath: Table[string, FileEntry],
|
byRelPath: ref Table[string, ref FileEntry],
|
||||||
byChecksum: Table[string, FileEntry]]
|
byChecksum: ref Table[string, seq[ref FileEntry]]]
|
||||||
|
ProgressWrapper* = tuple[impl: Progress, verbosity: Verbosity]
|
||||||
|
DisplayOptions = tuple[left, right, same, content, path: bool]
|
||||||
|
|
||||||
proc analyzeDir(root: string): DirAnalysis =
|
proc init(p: ProgressWrapper, root: string, fileCount: int): void =
|
||||||
let fileCount = countFiles(root)
|
if p.verbosity == normal: echo "-- ", root.expandFilename
|
||||||
|
if p.verbosity > very_quiet: p.impl.setMax(fileCount)
|
||||||
|
|
||||||
|
proc update(p: ProgressWrapper, count: int, file: string): void =
|
||||||
|
if p.verbosity > very_quiet:
|
||||||
|
p.impl.updateProgress(count, file[(file.high - 15)..file.high])
|
||||||
|
|
||||||
|
proc finish(p: ProgressWrapper): void =
|
||||||
|
if p.verbosity > very_quiet:
|
||||||
|
p.impl.erase
|
||||||
|
echo " ", p.impl.getMax, " files.\L"
|
||||||
|
|
||||||
proc countFiles(root: string): int =
|
proc countFiles(root: string): int =
|
||||||
# TODO
|
for file in walkDirRec(root):
|
||||||
|
result += 1
|
||||||
|
|
||||||
proc studyDir(root: string, ignore: Iterable[string]): TableRef[string, string] =
|
proc getRelPath(ancestor, child: string): string =
|
||||||
result = newTable[string, string]()
|
let ancestorPath = ancestor.expandFilename.split({DirSep, AltSep})
|
||||||
|
let childPath = child.expandFilename.split({DirSep, AltSep})
|
||||||
|
|
||||||
for path in walkDirRec(root):
|
# If the ancestor path is longer it cannot contain the child path and we
|
||||||
var relPath = substr(path, len(root))
|
# cannot construct a relative path without backtracking.
|
||||||
|
if (ancestorPath.len > childPath.len): return ""
|
||||||
|
|
||||||
if foldl(ignore, proc (acc: bool, it: string): bool = acc and match(relPath, re(it)), true): continue
|
# Compare the ancestor and child path up until the end of the ancestor path.
|
||||||
|
var idx = 0
|
||||||
|
while idx < ancestorPath.len and ancestorPath[idx] == childPath[idx]: idx += 1
|
||||||
|
|
||||||
var fileInfo = getFileInfo(path)
|
# If we stopped before reaching the end of the ancestor path it must be that
|
||||||
|
# the paths do not match. The ancestor cannot contain the child and we cannot
|
||||||
|
# build a relative path without backtracking.
|
||||||
|
if idx != ancestorPath.len: return ""
|
||||||
|
return foldl(@["."] & childPath[idx..childPath.high], joinPath(a, b))
|
||||||
|
|
||||||
|
proc newProgressWrapper*(verbosity: Verbosity): ProgressWrapper =
|
||||||
|
if verbosity > very_quiet:
|
||||||
|
result = (impl: newProgress(stdout, 0), verbosity: verbosity)
|
||||||
|
else: result = (impl: nil, verbosity: verbosity)
|
||||||
|
|
||||||
if fileInfo.kind == pcFile:
|
proc analyzeDir*(root: string, progress: ProgressWrapper): DirAnalysis =
|
||||||
result.add(relPath, $(toMD5(readFile(path))))
|
let fileCount = countFiles(root)
|
||||||
elif fileInfo.kind == pcDir:
|
|
||||||
result.add(relPath, "directory")
|
progress.init(root, fileCount)
|
||||||
|
|
||||||
|
result = (allEntries: @[],
|
||||||
|
byRelPath: newTable[string, ref FileEntry](),
|
||||||
|
byChecksum: newTable[string, seq[ref FileEntry]]())
|
||||||
|
|
||||||
|
var count = 0
|
||||||
|
for file in walkDirRec(root):
|
||||||
|
|
||||||
|
# Compute checksum
|
||||||
|
let md5sum = fileToMd5(file)
|
||||||
|
var fileEntry: ref FileEntry = new(ref FileEntry)
|
||||||
|
fileEntry[] = (relPath: getRelPath(root, file), checksum: md5sum )
|
||||||
|
|
||||||
|
# Add to allEntries list
|
||||||
|
result.allEntries.add(fileEntry)
|
||||||
|
|
||||||
|
# Add to byRelPath table
|
||||||
|
result.byRelPath[fileEntry.relPath] = fileEntry
|
||||||
|
|
||||||
|
# Add to the byChecksum table
|
||||||
|
if not result.byChecksum.hasKey(fileEntry.relPath):
|
||||||
|
result.byChecksum[fileEntry.checksum] = newSeq[ref FileEntry]()
|
||||||
|
|
||||||
|
result.byChecksum[fileEntry.checksum].add(fileEntry)
|
||||||
|
|
||||||
|
progress.update(count, file)
|
||||||
|
count += 1
|
||||||
|
|
||||||
|
progress.finish()
|
||||||
|
|
||||||
|
proc loadAnalysis*(path: string, analysis: var DirAnalysis) =
|
||||||
|
let inStream: Stream = newFileStream(path, fmRead)
|
||||||
|
load(inStream, analysis)
|
||||||
|
|
||||||
|
proc saveAnalysis*(path: string, analysis: DirAnalysis): void =
|
||||||
|
let outStream = newFileStream(path, fmWrite)
|
||||||
|
store(outStream, analysis)
|
||||||
|
|
||||||
|
proc intersection*(left, right: DirAnalysis): seq[ref FileEntry] =
|
||||||
|
return left.allEntries.filter do (item: ref FileEntry) -> bool:
|
||||||
|
if not right.byRelPath.hasKey(item.relPath): return false
|
||||||
|
let match = right.byRelPath[item.relPath]
|
||||||
|
return item.checksum == match.checksum
|
||||||
|
|
||||||
|
proc difference*(left, right: DirAnalysis): seq[ref FileEntry] =
|
||||||
|
return left.allEntries.filter do (item: ref FileEntry) -> bool:
|
||||||
|
return not right.byRelPath.hasKey(item.relPath) and
|
||||||
|
not right.byChecksum.hasKey(item.checksum)
|
||||||
|
|
||||||
|
proc `*`*(left, right: DirAnalysis): seq[ref FileEntry] {.inline.} =
|
||||||
|
return intersection(left, right)
|
||||||
|
proc `-`*(left, right: DirAnalysis): seq[ref FileEntry] {.inline.} =
|
||||||
|
return difference(left, right)
|
||||||
|
|
||||||
|
proc samePathDifferentContents*(left, right: DirAnalysis): seq[string] =
|
||||||
|
let matchingEntries = left.allEntries.filter do (item: ref FileEntry) -> bool:
|
||||||
|
if not right.byRelPath.hasKey(item.relPath): return false
|
||||||
|
let match = right.byRelPath[item.relPath]
|
||||||
|
return item.checksum != match.checksum
|
||||||
|
return matchingEntries.map(proc(item: ref FileEntry): string = return item.relPath)
|
||||||
|
|
||||||
|
proc sameContentsDifferentPaths*(left, right: DirAnalysis): seq[tuple[left, right: ref FileEntry]] =
|
||||||
|
result = @[]
|
||||||
|
for item in left.allEntries:
|
||||||
|
if not right.byChecksum.hasKey(item.checksum): continue
|
||||||
|
for match in right.byChecksum[item.checksum]:
|
||||||
|
if item.relPath != match.relPath: result.add((left: item, right:match))
|
||||||
|
|
||||||
|
proc quitWithError(error: string): void =
|
||||||
|
stderr.writeLine("treediff: " & error)
|
||||||
|
quit(QuitFailure)
|
||||||
|
|
||||||
when isMainModule:
|
when isMainModule:
|
||||||
|
|
||||||
let doc = """
|
let doc = """
|
||||||
treediff
|
|
||||||
|
|
||||||
Usage:
|
Usage:
|
||||||
treediff [-i <regex>]... [<path>]...
|
treediff <left> [<right>] [options]
|
||||||
treediff (-h | --help)
|
treediff (-h | --help)
|
||||||
treediff (-v | --version)
|
treediff (-V | --version)
|
||||||
|
|
||||||
|
<left> and <right> represent paths to directory roots to be compared. If one
|
||||||
|
of these paths points to a file instead of a directory, treediff assumes that
|
||||||
|
the file represents a saved directory analysis to be loaded in place of a
|
||||||
|
directory to compare. For example:
|
||||||
|
|
||||||
|
treediff /path/to/dir /path/to/output.json
|
||||||
|
|
||||||
|
will analyze the directory tree at '/path/to/dir' to create the left-side
|
||||||
|
analysis and load a pre-existing analysis from '/path/to/output.json' as the
|
||||||
|
right-side analysis.
|
||||||
|
|
||||||
Options:
|
Options:
|
||||||
-h --help Show this usage information.
|
-h --help Show this usage information.
|
||||||
-v --version Show the program version.
|
-V --version Show the program version.
|
||||||
|
-v --verbose Enable verbose output.
|
||||||
|
-q --quiet Suppress all output and error messages except for the
|
||||||
|
progress indicator.
|
||||||
|
-Q --very-quiet Suppress all output and error messages includeing the
|
||||||
|
progress indicator.
|
||||||
|
|
||||||
|
-1 --save-left <left_out> Save the left analysis to <left_out> (will be
|
||||||
|
formatted as JSON)
|
||||||
|
-2 --save-right <right_out> Save the right analysis to <right_out> (will be
|
||||||
|
formatted as JSON)
|
||||||
|
|
||||||
|
-s --same
|
||||||
|
-S --exclude-same
|
||||||
|
|
||||||
|
Show or hide information about files which are the same in both trees.
|
||||||
|
|
||||||
|
-c --content-mismatch
|
||||||
|
-C --exclude-content-mismatch
|
||||||
|
|
||||||
|
Show or hide information about files whose relative paths are the same
|
||||||
|
in both trees but whose contents differ.
|
||||||
|
|
||||||
|
-p --path-mismatch
|
||||||
|
-P --exclude-path-mismatch
|
||||||
|
|
||||||
|
Show or hide information about files whose contents are the same in both
|
||||||
|
trees but whose relative paths differ.
|
||||||
|
|
||||||
|
-l --left-only
|
||||||
|
-L --exclude-left-only
|
||||||
|
|
||||||
|
Show or hide information about files which are found only in the left
|
||||||
|
tree.
|
||||||
|
|
||||||
|
-r --right-only
|
||||||
|
-R --exclude-right-only
|
||||||
|
|
||||||
|
Show or hide information about files which are found only in the right
|
||||||
|
tree.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
let args = docopt(doc, version = "treediff 0.1")
|
let args = docopt(doc, version = "treediff v0.1")
|
||||||
|
|
||||||
for root in @(args["<path>"]):
|
var verbosity = normal
|
||||||
echo "Looking at ", root
|
if args["--quiet"]: verbosity = quiet
|
||||||
|
if args["--very-quiet"]: verbosity = very_quiet
|
||||||
|
let progressWrapper = newProgressWrapper(verbosity)
|
||||||
|
|
||||||
|
# Load or perform analysis
|
||||||
|
if not args["<left>"]:
|
||||||
|
quitWithError("Missing <left> parameter.")
|
||||||
|
|
||||||
|
let leftPath: string = $args["<left>"]
|
||||||
|
|
||||||
|
let loadPath = proc (path: string): DirAnalysis =
|
||||||
|
if not path.fileExists and not path.dirExists:
|
||||||
|
quitWithError($path & ": no such file or directory.")
|
||||||
|
|
||||||
|
let fileInfo = path.getFileInfo
|
||||||
|
if fileInfo.kind == pcDir:
|
||||||
|
return analyzeDir(path, progressWrapper)
|
||||||
|
elif fileInfo.kind == pcFile:
|
||||||
|
loadAnalysis(path, result)
|
||||||
|
else:
|
||||||
|
quitWithError($path & ": is not a file or directory")
|
||||||
|
|
||||||
|
var leftAnalysis, rightAnalysis: DirAnalysis
|
||||||
|
|
||||||
|
leftAnalysis = loadPath(leftPath)
|
||||||
|
|
||||||
|
if not args["<right>"]:
|
||||||
|
rightAnalysis = (allEntries: @[],
|
||||||
|
byRelPath: newTable[string, ref FileEntry](),
|
||||||
|
byChecksum: newTable[string, seq[ref FileEntry]]())
|
||||||
|
else:
|
||||||
|
var rightPath: string = $args["<right>"]
|
||||||
|
rightAnalysis = loadPath(rightPath)
|
||||||
|
|
||||||
|
# Check for output options
|
||||||
|
if args["--save-left"]:
|
||||||
|
saveAnalysis($args["--save-left"], leftAnalysis)
|
||||||
|
|
||||||
|
if args["--save-right"] and rightAnalysis.allEntries.len > 0:
|
||||||
|
saveAnalysis($args["--save-right"], rightAnalysis)
|
||||||
|
|
||||||
|
# Parse filter options
|
||||||
|
var displayOptions: DisplayOptions = (
|
||||||
|
left: false, right: false, same: false, content: false, path: false)
|
||||||
|
|
||||||
|
# If none of the explicit selectors are given, assume all are expected.
|
||||||
|
if not (args["--left-only"] or args["--right-only"] or
|
||||||
|
args["--same"] or args["--content-mismatch"] or
|
||||||
|
args["--path-mismatch"] ):
|
||||||
|
displayOptions = (left: true, right: true, same: true,
|
||||||
|
content: true, path: true)
|
||||||
|
|
||||||
|
if args["--same"]: displayOptions.same = true
|
||||||
|
if args["--exclude-same"]: displayOptions.same = false
|
||||||
|
if args["--content-mismatch"]: displayOptions.content = true
|
||||||
|
if args["--exclude-content-mismatch"]: displayOptions.content = false
|
||||||
|
if args["--path-mismatch"]: displayOptions.path = true
|
||||||
|
if args["--exclude-path-mismatch"]: displayOptions.path = false
|
||||||
|
if args["--left-only"]: displayOptions.left = true
|
||||||
|
if args["--exclude-left-only"]: displayOptions.left = false
|
||||||
|
if args["--right-only"]: displayOptions.right = true
|
||||||
|
if args["--exclude-right-only"]: displayOptions.right = false
|
||||||
|
|
||||||
|
# Display output results
|
||||||
|
if verbosity == normal:
|
||||||
|
if displayOptions.same:
|
||||||
|
let sameEntries = leftAnalysis * rightAnalysis
|
||||||
|
for fe in sameEntries: echo "same: ", fe.relPath
|
||||||
|
|
||||||
|
if displayOptions.content:
|
||||||
|
let contentsDiffer = samePathDifferentContents(leftAnalysis, rightAnalysis)
|
||||||
|
for path in contentsDiffer: echo "contents differ: ", path
|
||||||
|
|
||||||
|
if displayOptions.path:
|
||||||
|
let pathsDiffer = sameContentsDifferentPaths(leftAnalysis, rightAnalysis)
|
||||||
|
for pair in pathsDiffer:
|
||||||
|
echo "paths differ: ", pair.left.relPath, " ", pair.right.relPath
|
||||||
|
|
||||||
|
if displayOptions.left:
|
||||||
|
let leftOnly = leftAnalysis - rightAnalysis
|
||||||
|
for fe in leftOnly: echo "left only: ", fe.relPath
|
||||||
|
|
||||||
|
if displayOptions.right:
|
||||||
|
let rightOnly = rightAnalysis - leftAnalysis
|
||||||
|
for fe in rightOnly: echo "right only: ", fe.relPath
|
||||||
|
|
||||||
echo studyDir(root, @(args["<regex>"]))
|
|
||||||
echo ""
|
|
||||||
|
@ -1,11 +1,9 @@
|
|||||||
[Package]
|
# Package
|
||||||
name = "treeediff"
|
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
author = "Jonathan Bernard (jdb@jdb-labs.com)"
|
author = "Jonathan Bernard (jdb@jdb-labs.com)"
|
||||||
description = "Tree Diff"
|
description = "Utility to generate diffs of full directory trees."
|
||||||
license = "BSD"
|
license = "BSD"
|
||||||
|
bin = @["treediff"]
|
||||||
|
|
||||||
bin = "treediff"
|
# Dependencies
|
||||||
|
requires: @["nim >= 0.13.0", "docopt >= 0.1.0", "console_progress >= 1.2"]
|
||||||
[Deps]
|
|
||||||
Requires: "nim >= 0.10.0, docopt >= 0.1.0, iterutils >= 0.1.0"
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user