Compare commits

...

2 Commits
1.4.6 ... main

Author SHA1 Message Date
9a73b7f248 Updates for Nim 2.0 2024-08-05 07:02:06 -05:00
ce821d8f53 Change storage format of saved analysis. 2024-02-14 12:20:20 -06:00
4 changed files with 68 additions and 46 deletions

View File

@ -1,4 +1,4 @@
const VERSION* = "1.4.6" const VERSION* = "2.0.1"
const USAGE* = """ const USAGE* = """
Usage: Usage:
@ -60,4 +60,4 @@ Options:
Show or hide information about files which are found only in the right Show or hide information about files which are found only in the right
tree. tree.
""" """

View File

@ -1,5 +1,5 @@
import md5, streams import std/streams
import os import checksums/md5
proc fileToMD5*(filename: string) : string = proc fileToMD5*(filename: string) : string =

View File

@ -3,7 +3,8 @@
## ##
## Utility to compare the file contents of two directory trees. ## Utility to compare the file contents of two directory trees.
import os, tables, streams, sequtils, strutils, docopt, marshal import std/[json, jsonutils, os, tables, sequtils, strutils]
import docopt
import incremental_md5, console_progress import incremental_md5, console_progress
import ./cliconstants import ./cliconstants
@ -69,94 +70,116 @@ proc getRelPath(ancestor, child: string): string =
type type
FileEntry* = tuple[relPath: string, checksum: string] FileEntry* = ref tuple[relPath: string, checksum: string]
## Data about one file that has been analyzed ## Data about one file that has been analyzed
DirAnalysis* = ## Analysis data about one directory tree. DirAnalysis* = ## Analysis data about one directory tree.
tuple[allEntries: seq[ref FileEntry], tuple[allEntries: seq[FileEntry],
byRelPath: ref Table[string, ref FileEntry], byRelPath: TableRef[string, FileEntry],
byChecksum: ref Table[string, seq[ref FileEntry]]] byChecksum: TableRef[string, seq[FileEntry]]]
DisplayOptions = tuple[left, right, same, content, path: bool] DisplayOptions = tuple[left, right, same, content, path: bool]
## Consolidated description of which types of results to display. ## Consolidated description of which types of results to display.
func `$`(f: FileEntry): string = f.checksum & ": " & f.relPath
proc getOrFail(n: JsonNode, key: string, objName: string = ""): JsonNode =
## convenience method to get a key from a JObject or raise an exception
if not n.hasKey(key): raise newException(Exception, objName & " missing key '" & key & "'")
return n[key]
proc getIfExists(n: JsonNode, key: string): JsonNode =
## convenience method to get a key from a JObject or return null
result = if n.hasKey(key): n[key]
else: newJNull()
func parseFileEntry(n: JsonNode): FileEntry =
result = new(FileEntry)
result.relPath = n.getOrFail("relPath").getStr
result.checksum = n.getOrFail("checksum").getStr
func initDirAnalysis(): DirAnalysis =
(allEntries: @[],
byRelPath: newTable[string, FileEntry](),
byChecksum: newTable[string, seq[FileEntry]]())
func indexEntries(da: var DirAnalysis) =
for e in da.allEntries:
da.byRelPath[e.relPath] = e
if not da.byChecksum.hasKey(e.checksum):
da.byChecksum[e.checksum] = newSeq[FileEntry]()
da.byChecksum[e.checksum].add(e)
proc analyzeDir*(root: string, progress: ProgressWrapper): DirAnalysis = proc analyzeDir*(root: string, progress: ProgressWrapper): DirAnalysis =
## Inspect a directory and analyze all files, noting their relative paths and ## Inspect a directory and analyze all files, noting their relative paths and
## checksum of their contents. ## checksum of their contents.
let fileCount = countFiles(root) let fileCount = countFiles(root)
progress.init(root, fileCount) progress.init(root, fileCount + 10)
result = (allEntries: @[], result = initDirAnalysis()
byRelPath: newTable[string, ref FileEntry](),
byChecksum: newTable[string, seq[ref FileEntry]]())
var count = 0 var count = 0
for file in walkDirRec(root): for file in walkDirRec(root):
# Compute checksum
let md5sum = fileToMd5(file) let md5sum = fileToMd5(file)
var fileEntry: ref FileEntry = new(ref FileEntry)
fileEntry[] = (relPath: getRelPath(root, file), checksum: md5sum )
# Add to allEntries list, byRelPath table, and byChecksum table var fileEntry: FileEntry = new(FileEntry)
fileEntry[] = (relPath: getRelPath(root, file), checksum: md5sum)
result.allEntries.add(fileEntry) result.allEntries.add(fileEntry)
result.byRelPath[fileEntry.relPath] = fileEntry
if not result.byChecksum.hasKey(fileEntry.relPath):
result.byChecksum[fileEntry.checksum] = newSeq[ref FileEntry]()
result.byChecksum[fileEntry.checksum].add(fileEntry)
progress.update(count, file) progress.update(count, file)
count += 1 count += 1
result.indexEntries
count += 10
progress.finish() progress.finish()
proc loadAnalysis*(path: string, analysis: var DirAnalysis) = proc loadAnalysis*(path: string): DirAnalysis =
## Load a previously performed directory analysis. ## Load a previously performed directory analysis.
let inStream: Stream = newFileStream(path, fmRead) let allEntriesJson = parseJson(readFile(path))
load(inStream, analysis) result = initDirAnalysis()
result.allEntries = toSeq(items(allEntriesJson)).map(parseFileEntry)
result.indexEntries
proc saveAnalysis*(path: string, analysis: DirAnalysis): void = proc saveAnalysis*(path: string, analysis: DirAnalysis): void =
## Save a completed analysis. ## Save a completed analysis.
let outStream = newFileStream(path, fmWrite) writeFile(path, $(analysis.allEntries.toJson))
store(outStream, analysis)
proc intersection*(left, right: DirAnalysis): seq[ref FileEntry] = proc intersection*(left, right: DirAnalysis): seq[FileEntry] =
## Find all ``FileEntry`` that are the same on both sides: matching contents ## Find all ``FileEntry`` that are the same on both sides: matching contents
## and paths. ## and paths.
return left.allEntries.filter do (item: ref FileEntry) -> bool: return left.allEntries.filter do (item: FileEntry) -> bool:
if not right.byRelPath.hasKey(item.relPath): return false if not right.byRelPath.hasKey(item.relPath): return false
let match = right.byRelPath[item.relPath] let match = right.byRelPath[item.relPath]
if match == nil: return false
return item.checksum == match.checksum return item.checksum == match.checksum
proc difference*(left, right: DirAnalysis): seq[ref FileEntry] = proc difference*(left, right: DirAnalysis): seq[FileEntry] =
## Find all ``FileEntry`` that are present in the left but not present in ## Find all ``FileEntry`` that are present in the left but not present in
## the right. ## the right.
return left.allEntries.filter do (item: ref FileEntry) -> bool: return left.allEntries.filter do (item: FileEntry) -> bool:
return not right.byRelPath.hasKey(item.relPath) and return not right.byRelPath.hasKey(item.relPath) and
not right.byChecksum.hasKey(item.checksum) not right.byChecksum.hasKey(item.checksum)
proc `*`*(left, right: DirAnalysis): seq[ref FileEntry] {.inline.} = proc `*`*(left, right: DirAnalysis): seq[FileEntry] {.inline.} =
## Alias for `intersection(left, right) <#intersection>`_ ## Alias for `intersection(left, right) <#intersection>`_
return intersection(left, right) return intersection(left, right)
proc `-`*(left, right: DirAnalysis): seq[ref FileEntry] {.inline.} = proc `-`*(left, right: DirAnalysis): seq[FileEntry] {.inline.} =
## Alias for `difference(left, right) <#difference>`_ ## Alias for `difference(left, right) <#difference>`_
return difference(left, right) return difference(left, right)
proc samePathDifferentContents*(left, right: DirAnalysis): seq[string] = proc samePathDifferentContents*(left, right: DirAnalysis): seq[string] =
## Find all ``FileEntry`` that have the same paths in both trees but whose ## Find all ``FileEntry`` that have the same paths in both trees but whose
## contents differ. ## contents differ.
let matchingEntries = left.allEntries.filter do (item: ref FileEntry) -> bool: let matchingEntries = left.allEntries.filter do (item: FileEntry) -> bool:
if not right.byRelPath.hasKey(item.relPath): return false if not right.byRelPath.hasKey(item.relPath): return false
let match = right.byRelPath[item.relPath] let match = right.byRelPath[item.relPath]
return item.checksum != match.checksum return item.checksum != match.checksum
return matchingEntries.map(proc(item: ref FileEntry): string = return item.relPath) return matchingEntries.map(proc(item: FileEntry): string = return item.relPath)
proc sameContentsDifferentPaths*(left, right: DirAnalysis): seq[tuple[left, right: ref FileEntry]] = proc sameContentsDifferentPaths*(left, right: DirAnalysis): seq[tuple[left, right: FileEntry]] =
## Find all ``FileEntry`` whose contents are the same in both trees but ## Find all ``FileEntry`` whose contents are the same in both trees but
## which are located at differenc paths. ## which are located at differenc paths.
result = @[] result = @[]
@ -192,7 +215,7 @@ when isMainModule:
if fileInfo.kind == pcDir: if fileInfo.kind == pcDir:
return analyzeDir(path, progressWrapper) return analyzeDir(path, progressWrapper)
elif fileInfo.kind == pcFile: elif fileInfo.kind == pcFile:
loadAnalysis(path, result) result = loadAnalysis(path)
else: else:
quitWithError($path & ": is not a file or directory") quitWithError($path & ": is not a file or directory")
@ -202,8 +225,8 @@ when isMainModule:
if not args["<right>"]: if not args["<right>"]:
rightAnalysis = (allEntries: @[], rightAnalysis = (allEntries: @[],
byRelPath: newTable[string, ref FileEntry](), byRelPath: newTable[string, FileEntry](),
byChecksum: newTable[string, seq[ref FileEntry]]()) byChecksum: newTable[string, seq[FileEntry]]())
else: else:
var rightPath: string = $args["<right>"] var rightPath: string = $args["<right>"]
rightAnalysis = loadPath(rightPath) rightAnalysis = loadPath(rightPath)

View File

@ -1,5 +1,5 @@
# Package # Package
version = "1.4.6" version = "2.0.1"
author = "Jonathan Bernard (jdb@jdb-labs.com)" author = "Jonathan Bernard (jdb@jdb-labs.com)"
description = "Utility to generate diffs of full directory trees." description = "Utility to generate diffs of full directory trees."
license = "BSD" license = "BSD"
@ -7,11 +7,10 @@ bin = @["treediff"]
srcDir = "src/main/nim" srcDir = "src/main/nim"
# Dependencies # Dependencies
requires: @["nim >= 1.0.4", "docopt >= 0.6.8"] requires: @["nim >= 2.0.0", "docopt == 0.7.1", "checksums"]
# Dependencies from git.jdb-software.com/jdb/nim-packages # Dependencies from git.jdb-software.com/jdb/nim-packages
requires: @["console_progress >= 1.2.2"] requires: @["console_progress >= 1.2.2", "update_nim_package_version"]
requires "https://git.jdb-software.com/jdb/update-nim-package-version.git"
task updateVersion, "Update the version of this package.": task updateVersion, "Update the version of this package.":
exec "update_nim_package_version treediff 'src/main/nim/cliconstants.nim'" exec "update_nim_package_version treediff 'src/main/nim/cliconstants.nim'"