Change storage format of saved analysis.
This commit is contained in:
parent
de9ff2b54a
commit
ce821d8f53
@ -1,4 +1,4 @@
|
||||
const VERSION* = "1.4.6"
|
||||
const VERSION* = "2.0.0"
|
||||
|
||||
const USAGE* = """
|
||||
Usage:
|
||||
|
@ -1,5 +1,4 @@
|
||||
import md5, streams
|
||||
import os
|
||||
|
||||
proc fileToMD5*(filename: string) : string =
|
||||
|
||||
|
@ -3,7 +3,8 @@
|
||||
##
|
||||
## Utility to compare the file contents of two directory trees.
|
||||
|
||||
import os, tables, streams, sequtils, strutils, docopt, marshal
|
||||
import std/[json, jsonutils, os, tables, sequtils, strutils]
|
||||
import docopt
|
||||
import incremental_md5, console_progress
|
||||
|
||||
import ./cliconstants
|
||||
@ -69,94 +70,116 @@ proc getRelPath(ancestor, child: string): string =
|
||||
|
||||
|
||||
type
|
||||
FileEntry* = tuple[relPath: string, checksum: string]
|
||||
FileEntry* = ref tuple[relPath: string, checksum: string]
|
||||
## Data about one file that has been analyzed
|
||||
|
||||
DirAnalysis* = ## Analysis data about one directory tree.
|
||||
tuple[allEntries: seq[ref FileEntry],
|
||||
byRelPath: ref Table[string, ref FileEntry],
|
||||
byChecksum: ref Table[string, seq[ref FileEntry]]]
|
||||
tuple[allEntries: seq[FileEntry],
|
||||
byRelPath: TableRef[string, FileEntry],
|
||||
byChecksum: TableRef[string, seq[FileEntry]]]
|
||||
|
||||
|
||||
DisplayOptions = tuple[left, right, same, content, path: bool]
|
||||
## Consolidated description of which types of results to display.
|
||||
|
||||
func `$`(f: FileEntry): string = f.checksum & ": " & f.relPath
|
||||
|
||||
proc getOrFail(n: JsonNode, key: string, objName: string = ""): JsonNode =
|
||||
## convenience method to get a key from a JObject or raise an exception
|
||||
if not n.hasKey(key): raise newException(Exception, objName & " missing key '" & key & "'")
|
||||
return n[key]
|
||||
|
||||
proc getIfExists(n: JsonNode, key: string): JsonNode =
|
||||
## convenience method to get a key from a JObject or return null
|
||||
result = if n.hasKey(key): n[key]
|
||||
else: newJNull()
|
||||
|
||||
func parseFileEntry(n: JsonNode): FileEntry =
|
||||
result = new(FileEntry)
|
||||
result.relPath = n.getOrFail("relPath").getStr
|
||||
result.checksum = n.getOrFail("checksum").getStr
|
||||
|
||||
func initDirAnalysis(): DirAnalysis =
|
||||
(allEntries: @[],
|
||||
byRelPath: newTable[string, FileEntry](),
|
||||
byChecksum: newTable[string, seq[FileEntry]]())
|
||||
|
||||
func indexEntries(da: var DirAnalysis) =
|
||||
for e in da.allEntries:
|
||||
da.byRelPath[e.relPath] = e
|
||||
if not da.byChecksum.hasKey(e.checksum):
|
||||
da.byChecksum[e.checksum] = newSeq[FileEntry]()
|
||||
da.byChecksum[e.checksum].add(e)
|
||||
|
||||
proc analyzeDir*(root: string, progress: ProgressWrapper): DirAnalysis =
|
||||
## Inspect a directory and analyze all files, noting their relative paths and
|
||||
## checksum of their contents.
|
||||
let fileCount = countFiles(root)
|
||||
|
||||
progress.init(root, fileCount)
|
||||
progress.init(root, fileCount + 10)
|
||||
|
||||
result = (allEntries: @[],
|
||||
byRelPath: newTable[string, ref FileEntry](),
|
||||
byChecksum: newTable[string, seq[ref FileEntry]]())
|
||||
result = initDirAnalysis()
|
||||
|
||||
var count = 0
|
||||
for file in walkDirRec(root):
|
||||
|
||||
# Compute checksum
|
||||
let md5sum = fileToMd5(file)
|
||||
var fileEntry: ref FileEntry = new(ref FileEntry)
|
||||
fileEntry[] = (relPath: getRelPath(root, file), checksum: md5sum )
|
||||
|
||||
# Add to allEntries list, byRelPath table, and byChecksum table
|
||||
var fileEntry: FileEntry = new(FileEntry)
|
||||
fileEntry[] = (relPath: getRelPath(root, file), checksum: md5sum)
|
||||
result.allEntries.add(fileEntry)
|
||||
result.byRelPath[fileEntry.relPath] = fileEntry
|
||||
|
||||
if not result.byChecksum.hasKey(fileEntry.relPath):
|
||||
result.byChecksum[fileEntry.checksum] = newSeq[ref FileEntry]()
|
||||
|
||||
result.byChecksum[fileEntry.checksum].add(fileEntry)
|
||||
|
||||
progress.update(count, file)
|
||||
count += 1
|
||||
|
||||
result.indexEntries
|
||||
count += 10
|
||||
progress.finish()
|
||||
|
||||
proc loadAnalysis*(path: string, analysis: var DirAnalysis) =
|
||||
proc loadAnalysis*(path: string): DirAnalysis =
|
||||
## Load a previously performed directory analysis.
|
||||
let inStream: Stream = newFileStream(path, fmRead)
|
||||
load(inStream, analysis)
|
||||
let allEntriesJson = parseJson(readFile(path))
|
||||
result = initDirAnalysis()
|
||||
result.allEntries = toSeq(items(allEntriesJson)).map(parseFileEntry)
|
||||
result.indexEntries
|
||||
|
||||
proc saveAnalysis*(path: string, analysis: DirAnalysis): void =
|
||||
## Save a completed analysis.
|
||||
let outStream = newFileStream(path, fmWrite)
|
||||
store(outStream, analysis)
|
||||
writeFile(path, $(analysis.allEntries.toJson))
|
||||
|
||||
proc intersection*(left, right: DirAnalysis): seq[ref FileEntry] =
|
||||
proc intersection*(left, right: DirAnalysis): seq[FileEntry] =
|
||||
## Find all ``FileEntry`` that are the same on both sides: matching contents
|
||||
## and paths.
|
||||
return left.allEntries.filter do (item: ref FileEntry) -> bool:
|
||||
return left.allEntries.filter do (item: FileEntry) -> bool:
|
||||
if not right.byRelPath.hasKey(item.relPath): return false
|
||||
let match = right.byRelPath[item.relPath]
|
||||
if match == nil: return false
|
||||
return item.checksum == match.checksum
|
||||
|
||||
proc difference*(left, right: DirAnalysis): seq[ref FileEntry] =
|
||||
proc difference*(left, right: DirAnalysis): seq[FileEntry] =
|
||||
## Find all ``FileEntry`` that are present in the left but not present in
|
||||
## the right.
|
||||
return left.allEntries.filter do (item: ref FileEntry) -> bool:
|
||||
return left.allEntries.filter do (item: FileEntry) -> bool:
|
||||
return not right.byRelPath.hasKey(item.relPath) and
|
||||
not right.byChecksum.hasKey(item.checksum)
|
||||
|
||||
proc `*`*(left, right: DirAnalysis): seq[ref FileEntry] {.inline.} =
|
||||
proc `*`*(left, right: DirAnalysis): seq[FileEntry] {.inline.} =
|
||||
## Alias for `intersection(left, right) <#intersection>`_
|
||||
return intersection(left, right)
|
||||
|
||||
proc `-`*(left, right: DirAnalysis): seq[ref FileEntry] {.inline.} =
|
||||
proc `-`*(left, right: DirAnalysis): seq[FileEntry] {.inline.} =
|
||||
## Alias for `difference(left, right) <#difference>`_
|
||||
return difference(left, right)
|
||||
|
||||
proc samePathDifferentContents*(left, right: DirAnalysis): seq[string] =
|
||||
## Find all ``FileEntry`` that have the same paths in both trees but whose
|
||||
## contents differ.
|
||||
let matchingEntries = left.allEntries.filter do (item: ref FileEntry) -> bool:
|
||||
let matchingEntries = left.allEntries.filter do (item: FileEntry) -> bool:
|
||||
if not right.byRelPath.hasKey(item.relPath): return false
|
||||
let match = right.byRelPath[item.relPath]
|
||||
return item.checksum != match.checksum
|
||||
return matchingEntries.map(proc(item: ref FileEntry): string = return item.relPath)
|
||||
return matchingEntries.map(proc(item: FileEntry): string = return item.relPath)
|
||||
|
||||
proc sameContentsDifferentPaths*(left, right: DirAnalysis): seq[tuple[left, right: ref FileEntry]] =
|
||||
proc sameContentsDifferentPaths*(left, right: DirAnalysis): seq[tuple[left, right: FileEntry]] =
|
||||
## Find all ``FileEntry`` whose contents are the same in both trees but
|
||||
## which are located at differenc paths.
|
||||
result = @[]
|
||||
@ -192,7 +215,7 @@ when isMainModule:
|
||||
if fileInfo.kind == pcDir:
|
||||
return analyzeDir(path, progressWrapper)
|
||||
elif fileInfo.kind == pcFile:
|
||||
loadAnalysis(path, result)
|
||||
result = loadAnalysis(path)
|
||||
else:
|
||||
quitWithError($path & ": is not a file or directory")
|
||||
|
||||
@ -202,8 +225,8 @@ when isMainModule:
|
||||
|
||||
if not args["<right>"]:
|
||||
rightAnalysis = (allEntries: @[],
|
||||
byRelPath: newTable[string, ref FileEntry](),
|
||||
byChecksum: newTable[string, seq[ref FileEntry]]())
|
||||
byRelPath: newTable[string, FileEntry](),
|
||||
byChecksum: newTable[string, seq[FileEntry]]())
|
||||
else:
|
||||
var rightPath: string = $args["<right>"]
|
||||
rightAnalysis = loadPath(rightPath)
|
||||
|
@ -1,5 +1,5 @@
|
||||
# Package
|
||||
version = "1.4.6"
|
||||
version = "2.0.0"
|
||||
author = "Jonathan Bernard (jdb@jdb-labs.com)"
|
||||
description = "Utility to generate diffs of full directory trees."
|
||||
license = "BSD"
|
||||
|
Loading…
x
Reference in New Issue
Block a user