Nim implementation.

This commit is contained in:
Jonathan Bernard 2016-01-26 00:29:49 -06:00
parent ab99661720
commit c16a5a684d
4 changed files with 315 additions and 43 deletions

44
incremental_md5.nim Normal file
View File

@ -0,0 +1,44 @@
import md5
import os
proc fileToMD5*(filename: string) : string =
const blockSize: int = 8192
var
c: MD5Context
d: MD5Digest
f: File
bytesRead: int = 0
buffer: array[blockSize, char]
byteTotal: int = 0
#read chunk of file, calling update until all bytes have been read
try:
f = open(filename)
md5Init(c)
bytesRead = f.readBuffer(buffer.addr, blockSize)
while bytesRead > 0:
byteTotal += bytesRead
md5Update(c, buffer, bytesRead)
bytesRead = f.readBuffer(buffer.addr, blockSize)
md5Final(c, d)
except IOError:
echo("File not found.")
finally:
if f != nil:
close(f)
result = $d
when isMainModule:
if paramCount() > 0:
let arguments = commandLineParams()
echo("MD5: ", fileToMD5(arguments[0]))
else:
echo("Must pass filename.")
quit(-1)

View File

@ -1,51 +1,281 @@
import os, docopt, tables, md5, iterutils, re
import os, tables, streams, sequtils, strutils, docopt, marshal
import incremental_md5, console_progress
type
Verbosity* = enum very_quiet, quiet, normal
FileEntry* = tuple[relPath: string, checksum: string]
DirAnalysis* = tuple[allEntries: seq[FileEntry],
byRelPath: Table[string, FileEntry],
byChecksum: Table[string, FileEntry]]
DirAnalysis* = tuple[allEntries: seq[ref FileEntry],
byRelPath: ref Table[string, ref FileEntry],
byChecksum: ref Table[string, seq[ref FileEntry]]]
ProgressWrapper* = tuple[impl: Progress, verbosity: Verbosity]
DisplayOptions = tuple[left, right, same, content, path: bool]
proc analyzeDir(root: string): DirAnalysis =
let fileCount = countFiles(root)
proc init(p: ProgressWrapper, root: string, fileCount: int): void =
if p.verbosity == normal: echo "-- ", root.expandFilename
if p.verbosity > very_quiet: p.impl.setMax(fileCount)
proc update(p: ProgressWrapper, count: int, file: string): void =
if p.verbosity > very_quiet:
p.impl.updateProgress(count, file[(file.high - 15)..file.high])
proc finish(p: ProgressWrapper): void =
if p.verbosity > very_quiet:
p.impl.erase
echo " ", p.impl.getMax, " files.\L"
proc countFiles(root: string): int =
# TODO
for file in walkDirRec(root):
result += 1
proc studyDir(root: string, ignore: Iterable[string]): TableRef[string, string] =
result = newTable[string, string]()
proc getRelPath(ancestor, child: string): string =
let ancestorPath = ancestor.expandFilename.split({DirSep, AltSep})
let childPath = child.expandFilename.split({DirSep, AltSep})
for path in walkDirRec(root):
var relPath = substr(path, len(root))
# If the ancestor path is longer it cannot contain the child path and we
# cannot construct a relative path without backtracking.
if (ancestorPath.len > childPath.len): return ""
if foldl(ignore, proc (acc: bool, it: string): bool = acc and match(relPath, re(it)), true): continue
# Compare the ancestor and child path up until the end of the ancestor path.
var idx = 0
while idx < ancestorPath.len and ancestorPath[idx] == childPath[idx]: idx += 1
var fileInfo = getFileInfo(path)
# If we stopped before reaching the end of the ancestor path it must be that
# the paths do not match. The ancestor cannot contain the child and we cannot
# build a relative path without backtracking.
if idx != ancestorPath.len: return ""
return foldl(@["."] & childPath[idx..childPath.high], joinPath(a, b))
if fileInfo.kind == pcFile:
result.add(relPath, $(toMD5(readFile(path))))
elif fileInfo.kind == pcDir:
result.add(relPath, "directory")
proc newProgressWrapper*(verbosity: Verbosity): ProgressWrapper =
if verbosity > very_quiet:
result = (impl: newProgress(stdout, 0), verbosity: verbosity)
else: result = (impl: nil, verbosity: verbosity)
proc analyzeDir*(root: string, progress: ProgressWrapper): DirAnalysis =
let fileCount = countFiles(root)
progress.init(root, fileCount)
result = (allEntries: @[],
byRelPath: newTable[string, ref FileEntry](),
byChecksum: newTable[string, seq[ref FileEntry]]())
var count = 0
for file in walkDirRec(root):
# Compute checksum
let md5sum = fileToMd5(file)
var fileEntry: ref FileEntry = new(ref FileEntry)
fileEntry[] = (relPath: getRelPath(root, file), checksum: md5sum )
# Add to allEntries list
result.allEntries.add(fileEntry)
# Add to byRelPath table
result.byRelPath[fileEntry.relPath] = fileEntry
# Add to the byChecksum table
if not result.byChecksum.hasKey(fileEntry.relPath):
result.byChecksum[fileEntry.checksum] = newSeq[ref FileEntry]()
result.byChecksum[fileEntry.checksum].add(fileEntry)
progress.update(count, file)
count += 1
progress.finish()
proc loadAnalysis*(path: string, analysis: var DirAnalysis) =
let inStream: Stream = newFileStream(path, fmRead)
load(inStream, analysis)
proc saveAnalysis*(path: string, analysis: DirAnalysis): void =
let outStream = newFileStream(path, fmWrite)
store(outStream, analysis)
proc intersection*(left, right: DirAnalysis): seq[ref FileEntry] =
return left.allEntries.filter do (item: ref FileEntry) -> bool:
if not right.byRelPath.hasKey(item.relPath): return false
let match = right.byRelPath[item.relPath]
return item.checksum == match.checksum
proc difference*(left, right: DirAnalysis): seq[ref FileEntry] =
return left.allEntries.filter do (item: ref FileEntry) -> bool:
return not right.byRelPath.hasKey(item.relPath) and
not right.byChecksum.hasKey(item.checksum)
proc `*`*(left, right: DirAnalysis): seq[ref FileEntry] {.inline.} =
return intersection(left, right)
proc `-`*(left, right: DirAnalysis): seq[ref FileEntry] {.inline.} =
return difference(left, right)
proc samePathDifferentContents*(left, right: DirAnalysis): seq[string] =
let matchingEntries = left.allEntries.filter do (item: ref FileEntry) -> bool:
if not right.byRelPath.hasKey(item.relPath): return false
let match = right.byRelPath[item.relPath]
return item.checksum != match.checksum
return matchingEntries.map(proc(item: ref FileEntry): string = return item.relPath)
proc sameContentsDifferentPaths*(left, right: DirAnalysis): seq[tuple[left, right: ref FileEntry]] =
result = @[]
for item in left.allEntries:
if not right.byChecksum.hasKey(item.checksum): continue
for match in right.byChecksum[item.checksum]:
if item.relPath != match.relPath: result.add((left: item, right:match))
proc quitWithError(error: string): void =
stderr.writeLine("treediff: " & error)
quit(QuitFailure)
when isMainModule:
let doc = """
treediff
Usage:
treediff [-i <regex>]... [<path>]...
treediff <left> [<right>] [options]
treediff (-h | --help)
treediff (-v | --version)
treediff (-V | --version)
<left> and <right> represent paths to directory roots to be compared. If one
of these paths points to a file instead of a directory, treediff assumes that
the file represents a saved directory analysis to be loaded in place of a
directory to compare. For example:
treediff /path/to/dir /path/to/output.json
will analyze the directory tree at '/path/to/dir' to create the left-side
analysis and load a pre-existing analysis from '/path/to/output.json' as the
right-side analysis.
Options:
-h --help Show this usage information.
-v --version Show the program version.
-V --version Show the program version.
-v --verbose Enable verbose output.
-q --quiet Suppress all output and error messages except for the
progress indicator.
-Q --very-quiet Suppress all output and error messages includeing the
progress indicator.
-1 --save-left <left_out> Save the left analysis to <left_out> (will be
formatted as JSON)
-2 --save-right <right_out> Save the right analysis to <right_out> (will be
formatted as JSON)
-s --same
-S --exclude-same
Show or hide information about files which are the same in both trees.
-c --content-mismatch
-C --exclude-content-mismatch
Show or hide information about files whose relative paths are the same
in both trees but whose contents differ.
-p --path-mismatch
-P --exclude-path-mismatch
Show or hide information about files whose contents are the same in both
trees but whose relative paths differ.
-l --left-only
-L --exclude-left-only
Show or hide information about files which are found only in the left
tree.
-r --right-only
-R --exclude-right-only
Show or hide information about files which are found only in the right
tree.
"""
let args = docopt(doc, version = "treediff 0.1")
let args = docopt(doc, version = "treediff v0.1")
for root in @(args["<path>"]):
echo "Looking at ", root
var verbosity = normal
if args["--quiet"]: verbosity = quiet
if args["--very-quiet"]: verbosity = very_quiet
let progressWrapper = newProgressWrapper(verbosity)
# Load or perform analysis
if not args["<left>"]:
quitWithError("Missing <left> parameter.")
let leftPath: string = $args["<left>"]
let loadPath = proc (path: string): DirAnalysis =
if not path.fileExists and not path.dirExists:
quitWithError($path & ": no such file or directory.")
let fileInfo = path.getFileInfo
if fileInfo.kind == pcDir:
return analyzeDir(path, progressWrapper)
elif fileInfo.kind == pcFile:
loadAnalysis(path, result)
else:
quitWithError($path & ": is not a file or directory")
var leftAnalysis, rightAnalysis: DirAnalysis
leftAnalysis = loadPath(leftPath)
if not args["<right>"]:
rightAnalysis = (allEntries: @[],
byRelPath: newTable[string, ref FileEntry](),
byChecksum: newTable[string, seq[ref FileEntry]]())
else:
var rightPath: string = $args["<right>"]
rightAnalysis = loadPath(rightPath)
# Check for output options
if args["--save-left"]:
saveAnalysis($args["--save-left"], leftAnalysis)
if args["--save-right"] and rightAnalysis.allEntries.len > 0:
saveAnalysis($args["--save-right"], rightAnalysis)
# Parse filter options
var displayOptions: DisplayOptions = (
left: false, right: false, same: false, content: false, path: false)
# If none of the explicit selectors are given, assume all are expected.
if not (args["--left-only"] or args["--right-only"] or
args["--same"] or args["--content-mismatch"] or
args["--path-mismatch"] ):
displayOptions = (left: true, right: true, same: true,
content: true, path: true)
if args["--same"]: displayOptions.same = true
if args["--exclude-same"]: displayOptions.same = false
if args["--content-mismatch"]: displayOptions.content = true
if args["--exclude-content-mismatch"]: displayOptions.content = false
if args["--path-mismatch"]: displayOptions.path = true
if args["--exclude-path-mismatch"]: displayOptions.path = false
if args["--left-only"]: displayOptions.left = true
if args["--exclude-left-only"]: displayOptions.left = false
if args["--right-only"]: displayOptions.right = true
if args["--exclude-right-only"]: displayOptions.right = false
# Display output results
if verbosity == normal:
if displayOptions.same:
let sameEntries = leftAnalysis * rightAnalysis
for fe in sameEntries: echo "same: ", fe.relPath
if displayOptions.content:
let contentsDiffer = samePathDifferentContents(leftAnalysis, rightAnalysis)
for path in contentsDiffer: echo "contents differ: ", path
if displayOptions.path:
let pathsDiffer = sameContentsDifferentPaths(leftAnalysis, rightAnalysis)
for pair in pathsDiffer:
echo "paths differ: ", pair.left.relPath, " ", pair.right.relPath
if displayOptions.left:
let leftOnly = leftAnalysis - rightAnalysis
for fe in leftOnly: echo "left only: ", fe.relPath
if displayOptions.right:
let rightOnly = rightAnalysis - leftAnalysis
for fe in rightOnly: echo "right only: ", fe.relPath
echo studyDir(root, @(args["<regex>"]))
echo ""

View File

@ -1,11 +1,9 @@
[Package]
name = "treeediff"
# Package
version = "0.1.0"
author = "Jonathan Bernard (jdb@jdb-labs.com)"
description = "Tree Diff"
description = "Utility to generate diffs of full directory trees."
license = "BSD"
bin = @["treediff"]
bin = "treediff"
[Deps]
Requires: "nim >= 0.10.0, docopt >= 0.1.0, iterutils >= 0.1.0"
# Dependencies
requires: @["nim >= 0.13.0", "docopt >= 0.1.0", "console_progress >= 1.2"]