4 Commits
v1.0 ... v1.3

7 changed files with 426 additions and 190 deletions

View File

@ -3,7 +3,7 @@ apply plugin: "application"
apply plugin: "maven" apply plugin: "maven"
group = "com.jdblabs" group = "com.jdblabs"
version = "1.0" version = "1.3"
mainClassName = "com.jdblabs.file.treediff.TreeDiff" mainClassName = "com.jdblabs.file.treediff.TreeDiff"
repositories { repositories {
@ -12,10 +12,7 @@ repositories {
dependencies { dependencies {
compile 'org.codehaus.groovy:groovy-all:2.4.3' compile 'org.codehaus.groovy:groovy-all:2.4.3'
compile 'org.slf4j:slf4j-api:1.7.10' compile 'com.jdbernard:jdb-util:3.8'
compile 'ch.qos.logback:logback-core:1.1.2'
compile 'ch.qos.logback:logback-classic:1.1.2'
compile 'com.jdbernard:jdb-util:3.5'
compile 'commons-codec:commons-codec:1.10' compile 'commons-codec:commons-codec:1.10'
compile 'com.fasterxml.jackson.core:jackson-databind:2.4.4' compile 'com.fasterxml.jackson.core:jackson-databind:2.4.4'

View File

@ -1,185 +0,0 @@
package com.jdblabs.file.treediff
import groovy.io.FileType
import groovy.swing.SwingBuilder
import com.jdbernard.util.LightOptionParser
import org.apache.commons.codec.digest.DigestUtils
import com.fasterxml.jackson.databind.ObjectMapper
public class TreeDiff {
public static final String VERSION = "1.0";
private ObjectMapper objectMapper
public static void main(String[] args) {
def cliDef = [
'h': [longName: 'help'],
'v': [longName: 'version'],
'g': [longName: 'gui'],
'i': [longName: 'analysis-in'],
'o': [longName: 'analysis-out'],
's': [longName: 'same'],
'S': [longName: 'exclude-same'],
'c': [longName: 'content-mismatch'],
'C': [longName: 'exclude-content-mismatch'],
'p': [longName: 'path-mismatch'],
'P': [longName: 'exclude-path-mismatch'],
'l': [longName: 'left-only'],
'L': [longName: 'exclude-left-only'],
'r': [longName: 'right-only'],
'R': [longName: 'exclude-right-only'],
'rd': [longName: 'directory', arguments: 1]
]
def opts = LightOptionParser.parseOptions(cliDef, args)
if (opts.h) { /* TODO */ return }
if (opts.v) {
println "JDB Labs TreeDiff v${VERSION}"
return }
if (opts.g) { gui(opts) }
else cli(opts)
}
public static void cli(def opts) {
def show = [ same: false, content: false, path: false,
left: false, right: false]
// If none of the explicit selectors are given, assume all are expeced.
if (!opts.s && !opts.c && !opts.p && !opts.l && !opts.r) {
show = [ same: true, content: true, path: true,
left: true, right: true] }
if (opts.s) show.same = true; if (opts.S) show.same = false
if (opts.c) show.content = true; if (opts.C) show.content = false
if (opts.p) show.path = true; if (opts.P) show.path = false
if (opts.l) show.left = true; if (opts.L) show.left = false
if (opts.r) show.right = true; if (opts.R) show.right = false
if (opts.args.size() < 2) {
/* TODO: print usage */
println "TreeDiff v${VERSION}: exactly two directory paths are required to compare."
System.exit(1) }
File rootDir, leftDir, rightDir
if (opts.rd) rootDir = new File(opts.rd[0] ?: '.')
else rootDir = new File('.')
if (opts.args[0].startsWith('/')) leftDir = new File(opts.args[0])
else leftDir = new File(rootDir, opts.args[0])
if (opts.args[1].startsWith('/')) rightDir = new File(opts.args[1])
else rightDir = new File(rootDir, opts.args[1])
if (!leftDir.exists() || !leftDir.isDirectory()) {
println "TreeDiff v${VERSION}: '${opts.args[0]}' cannot be found or is not a directory"
System.exit(2) }
if (!rightDir.exists() || !rightDir.isDirectory()) {
println "TreeDiff v${VERSION}: '${opts.args[1]}' cannot be found or is not a directory"
System.exit(2) }
DirAnalysis left = analyzeDir(leftDir)
DirAnalysis right = analyzeDir(rightDir)
if (show.same) same(left, right).each {
println "same: ${it.relativePath}" }
if (show.content) samePathDifferentContents(left, right).each {
println "contents differ: $it" }
if (show.path) sameContentsDifferentPaths(left, right).each {
println "paths differ: ${it.first.relativePath} ${it.second.relativePath}" }
if (show.left) firstSideOnly(left, right).each {
println "left only: ${it.relativePath}" }
if (show.right) firstSideOnly(right, left).each {
println "right only: ${it.relativePath}" }
}
public static gui(def opts) {
frame(title: "TreeDif v${VERSION}", show: true) {
boxLayout()
}
}
public static List<FileEntry> same(DirAnalysis left, DirAnalysis right) {
return left.allEntries.findAll { l ->
FileEntry match = right.byRelativePath[l.relativePath]
return match != null && l.checksum == match.checksum }
}
public static Set<String> samePathDifferentContents(DirAnalysis left, DirAnalysis right) {
return left.allEntries.findAll { l ->
FileEntry match = right.byRelativePath[l.relativePath]
return match != null && l.checksum != match.checksum }
.collect { it.relativePath } }
public static List<Tuple2<FileEntry, FileEntry> > sameContentsDifferentPaths(DirAnalysis left, DirAnalysis right) {
return left.allEntries.inject([]) { acc, l ->
List<FileEntry> matches = right.byChecksum[l.checksum]
if (matches) {
acc.addAll(matches.findAll { l.relativePath != it.relativePath }
.collect { r -> new Tuple2<FileEntry, FileEntry>(l, r) }) }
return acc }.sort { it.first.checksum }
}
public static List<FileEntry> firstSideOnly(DirAnalysis first, DirAnalysis second) {
return first.allEntries.findAll {
!second.byRelativePath.containsKey(it.relativePath) &&
!second.byChecksum.containsKey(it.checksum) } }
public static DirAnalysis analyzeDir(File root) {
DirAnalysis analysis = new DirAnalysis()
root.eachFileRecurse(FileType.FILES) { file ->
FileEntry entry = new FileEntry(
file: file,
relativePath: getRelativePath(root, file),
checksum: file.withInputStream { DigestUtils.md5Hex(it) })
analysis.allEntries << entry;
analysis.byRelativePath[entry.relativePath] = entry
if (!analysis.byChecksum.containsKey(entry.checksum)) {
analysis.byChecksum[entry.checksum] = [] }
analysis.byChecksum[entry.checksum] << entry }
return analysis
}
/** #### `getRelativePath`
* Given a parent path and a child path, assuming the child path is
* contained within the parent path, return the relative path from the
* parent to the child. */
public static String getRelativePath(File parent, File child) {
def parentPath = parent.canonicalPath.split("[\\\\/]")
def childPath = child.canonicalPath.split("[\\\\/]")
/// If the parent path is longer it cannot contain the child path and
/// we cannot construct a relative path without backtracking.
if (parentPath.length > childPath.length) return ""
/// Compare the parent and child path up until the end of the parent
/// path.
int b = 0
while (b < parentPath.length && parentPath[b] == childPath[b] ) b++;
/// If we stopped before reaching the end of the parent path it must be
/// that the paths do not match. The parent cannot contain the child and
/// we cannot build a relative path without backtracking.
if (b != parentPath.length) return ""
return (['.'] + childPath[b..<childPath.length]).join('/') }
}

View File

@ -0,0 +1,33 @@
package com.jdblabs.file.treediff
import com.jdbernard.util.ConsoleProgressBar
public class ConsoleProgressListener implements ProgressListener {
private ConsoleProgressBar consoleProgressBar
private def out
private boolean verbose
public ConsoleProgressListener(def out, boolean verbose) {
this.consoleProgressBar = new ConsoleProgressBar(out: out)
this.out = out
this.verbose = verbose }
public void init(File root, int total) {
out.println "-- ${root.canonicalPath}"
out.println " $total files"
consoleProgressBar.max = total
consoleProgressBar.update(0, root.name) }
public void update(File curFile, int curCount) {
if (verbose) {
consoleProgressBar.erase()
out.println " ${curFile.canonicalPath}" }
consoleProgressBar.update(curCount, curFile.name) }
public void finish() {
consoleProgressBar.erase()
out.println "" }
}

View File

@ -0,0 +1,9 @@
package com.jdblabs.file.treediff;
import java.io.File;
public interface ProgressListener {
void init(File root, int total);
void update(File curFile, int curCount);
void finish();
}

View File

@ -0,0 +1,382 @@
package com.jdblabs.file.treediff
import com.fasterxml.jackson.databind.ObjectMapper
import com.jdbernard.util.LightOptionParser
import groovy.io.FileType
import groovy.swing.SwingBuilder
import org.apache.commons.codec.digest.DigestUtils
public class TreeDiff {
public static final String VERSION = "1.3"
private ObjectMapper objectMapper = new ObjectMapper()
private PrintStream stdout
private PrintStream stderr
private File relativeRoot
private Map displayFilter
private boolean verbose
private boolean quiet
public static void main(String[] args) {
TreeDiff inst = new TreeDiff(
stdout: System.out,
stderr: System.err)
inst.doDiff(args)
}
public void doDiff(String[] args) {
def cliDef = [
'h': [longName: 'help'],
'v': [longName: 'verbose'],
'V': [longName: 'version'],
'g': [longName: 'gui'],
'i': [longName: 'analysis-in', arguments: 2],
'o': [longName: 'analysis-out', arguments: 1],
's': [longName: 'same'],
'S': [longName: 'exclude-same'],
'c': [longName: 'content-mismatch'],
'C': [longName: 'exclude-content-mismatch'],
'p': [longName: 'path-mismatch'],
'P': [longName: 'exclude-path-mismatch'],
'l': [longName: 'left-only'],
'L': [longName: 'exclude-left-only'],
'r': [longName: 'right-only'],
'R': [longName: 'exclude-right-only'],
'q': [longName: 'quiet'],
'Q': [longName: 'very-quiet'],
'rd': [longName: 'directory', arguments: 1]
]
def opts = LightOptionParser.parseOptions(cliDef, args)
if (opts.h) { println this.usage; return }
if (opts.V) {
stdout.println "JDB Labs TreeDiff v${VERSION}"
return }
verbose = opts.v
if (opts.rd) relativeRoot = new File(opts.rd[0] ?: '.')
else relativeRoot = new File('.')
def progressListener
if (opts.g) {
// TODO
}
else progressListener = new ConsoleProgressListener(stdout, verbose)
if (opts.q) quiet = true
if (opts.Q) {
quiet = true
progressListener = null }
DirAnalysis left, right
File leftFile, rightFile
if (opts.i) {
leftFile = resolvePath(opts.i[0], relativeRoot)
rightFile = resolvePath(opts.i[1], relativeRoot)
left = objectMapper.readValue(leftFile, DirAnalysis)
right = objectMapper.readValue(rightFile, DirAnalysis) }
else {
if (opts.args.size() < 2) {
/* TODO: print usage */
stderr.println "TreeDiff v${VERSION}: exactly two directory paths are required to compare."
System.exit(1) }
leftFile = resolvePath(opts.args[0], relativeRoot)
rightFile = resolvePath(opts.args[1], relativeRoot)
if (!leftFile.isDirectory()) {
stderr.println "TreeDiff v${VERSION}: '${opts.args[0]}' is not a directory"
System.exit(2) }
if (!rightFile.isDirectory()) {
stderr.println "TreeDiff v${VERSION}: '${opts.args[1]}' is not a directory"
System.exit(2) }
left = analyzeDir(leftFile, progressListener)
right = analyzeDir(rightFile, progressListener) }
displayFilter = [ same: false, content: false, path: false,
left: false, right: false]
// If none of the explicit selectors are given, assume all are expeced.
if (!opts.s && !opts.c && !opts.p && !opts.l && !opts.r && !opts.q) {
displayFilter = [ same: true, content: true, path: true,
left: true, right: true] }
if (opts.s) displayFilter.same = true
if (opts.S) displayFilter.same = false
if (opts.c) displayFilter.content = true
if (opts.C) displayFilter.content = false
if (opts.p) displayFilter.path = true
if (opts.P) displayFilter.path = false
if (opts.l) displayFilter.left = true
if (opts.L) displayFilter.left = false
if (opts.r) displayFilter.right = true
if (opts.R) displayFilter.right = false
if (opts.g) displayResultsGui(left, right)
else displayResultsCli(left, right)
if (opts.o) {
String rootName = opts.o[0]
File leftOut, rightOut
if (rootName.startsWith('/')) leftOut = new File(rootName + '.left')
else leftOut = new File(relativeRoot, rootName + '.left')
if (rootName.startsWith('/')) rightOut = new File(rootName + '.right')
else rightOut = new File(relativeRoot, rootName + '.right')
objectMapper.writeValue(leftOut, left)
objectMapper.writeValue(rightOut, right) }
}
public void displayResultsCli(DirAnalysis left, DirAnalysis right) {
if (displayFilter.same) same(left, right).each {
stdout.println "same: ${it.relativePath}" }
if (displayFilter.content) samePathDifferentContents(left, right).each {
stdout.println "contents differ: $it" }
if (displayFilter.path) sameContentsDifferentPaths(left, right).each {
stdout.println "paths differ: ${it.first.relativePath} ${it.second.relativePath}" }
if (displayFilter.left) firstSideOnly(left, right).each {
stdout.println "left only: ${it.relativePath}" }
if (displayFilter.right) firstSideOnly(right, left).each {
stdout.println "right only: ${it.relativePath}" }
}
public static gui(def opts) {
frame(title: "TreeDif v${VERSION}", show: true) {
boxLayout()
}
}
public static List<FileEntry> same(DirAnalysis left, DirAnalysis right) {
return left.allEntries.findAll { l ->
FileEntry match = right.byRelativePath[l.relativePath]
return match != null && l.checksum == match.checksum }
}
public static Set<String> samePathDifferentContents(DirAnalysis left, DirAnalysis right) {
return left.allEntries.findAll { l ->
FileEntry match = right.byRelativePath[l.relativePath]
return match != null && l.checksum != match.checksum }
.collect { it.relativePath } }
public static List<Tuple2<FileEntry, FileEntry> > sameContentsDifferentPaths(DirAnalysis left, DirAnalysis right) {
return left.allEntries.inject([]) { acc, l ->
List<FileEntry> matches = right.byChecksum[l.checksum]
if (matches) {
acc.addAll(matches.findAll { l.relativePath != it.relativePath }
.collect { r -> new Tuple2<FileEntry, FileEntry>(l, r) }) }
return acc }.sort { it.first.checksum }
}
public static List<FileEntry> firstSideOnly(DirAnalysis first, DirAnalysis second) {
return first.allEntries.findAll {
!second.byRelativePath.containsKey(it.relativePath) &&
!second.byChecksum.containsKey(it.checksum) } }
public DirAnalysis analyzeDir(File root, ProgressListener progressListener) {
DirAnalysis analysis = new DirAnalysis()
int totalNumFiles = 0
int filesProcessed = 0
root.eachFileRecurse(FileType.FILES) { totalNumFiles++ }
boolean showProgress = progressListener != null &&
(verbose || totalNumFiles > 100);
if (progressListener) progressListener.init(root, totalNumFiles)
root.eachFileRecurse(FileType.FILES) { file ->
if (showProgress) progressListener.update(file, ++filesProcessed)
String checksum = ""
try { checksum = file.withInputStream { DigestUtils.md5Hex(it) } }
catch (Exception e) {
if (!quiet) {
stderr.println "Unable to process file: ${file.canonicalPath}"
stderr.println " details: ${e.getLocalizedMessage()}" } }
FileEntry entry = new FileEntry(
file: file,
relativePath: getRelativePath(root, file),
checksum: checksum )
analysis.allEntries << entry
analysis.byRelativePath[entry.relativePath] = entry
if (!analysis.byChecksum.containsKey(entry.checksum)) {
analysis.byChecksum[entry.checksum] = [] }
analysis.byChecksum[entry.checksum] << entry }
if (progressListener) progressListener.finish()
return analysis
}
/** #### `getRelativePath`
* Given a parent path and a child path, assuming the child path is
* contained within the parent path, return the relative path from the
* parent to the child. */
public static String getRelativePath(File parent, File child) {
def parentPath = parent.canonicalPath.split("[\\\\/]")
def childPath = child.canonicalPath.split("[\\\\/]")
/// If the parent path is longer it cannot contain the child path and
/// we cannot construct a relative path without backtracking.
if (parentPath.length > childPath.length) return ""
/// Compare the parent and child path up until the end of the parent
/// path.
int b = 0
while (b < parentPath.length && parentPath[b] == childPath[b] ) b++
/// If we stopped before reaching the end of the parent path it must be
/// that the paths do not match. The parent cannot contain the child and
/// we cannot build a relative path without backtracking.
if (b != parentPath.length) return ""
return (['.'] + childPath[b..<childPath.length]).join('/') }
public File resolvePath(String path, File rootDir) {
File f
if (path.startsWith('/')) f = new File(path)
else f = new File(rootDir, path)
if (!f.exists()) {
strerr.println "TreeDiff v${VERSION}: '${f.canonicalPath}' cannot be found"
System.exit(2) }
return f
}
private void verboseOut(String msg) { if (verbose) stdout.println msg }
private void verboseErr(String msg) { if (verbose) stderr.println msg }
public String getUsage() {
return """\
JDB Labs TreeDiff v${VERSION}
Gather and display information about the differences between two file trees,
including files found in only one side and not the other, files that match on
both sides, files which share the same contents but reside in differing paths
on each side, and files that reside at the same location on both sides but
whose contents differ.
usage: treediff [options] <left-direction> <right-directory>
where options are:
-h, --help Output this usage information.
-v, --verbose Enable verbose output.
-V, --version Output the version information for the utility.
-g, --gui Launch the graphical interface (not yet implemented).
-s, --same
Output information about files that are the same on both sides.
-S, --exclude-same
Do not output information about files that are the same on both sides.
-c, --content-mismatch
Output information about files that have the same relative path on both
side but whose contents differ.
-c, --exclude-content-mismatch
Do not output information about files that have the same relative path
on both side but whose contents differ.
-p, --path-mismatch
Output information about files that have the same content but reside at
different relative paths on each side.
-P, --exclude-path-mismatch
Do not output information about files that have the same content but
reside at different relative paths on each side.
-l, --left-only
Output information about files found on only the left side (missing
from the right entirely).
-L, --exclude-left-only
Do not output information about files found on the left side only
(missing from the right entirely).
-r, --right-only
Output information about files found on only the right side (missing
from the left entirely).
-R, --exclude-right-only
Do not output information about files found on the right side only
(missing from the left entirely).
-q, --quiet
Suppress all output and error messages except for the progress
indicator.
-Q, --very-quiet
Suppress all output and error messages including the progress
indicator.
-rd, --direction <directory-path>
Use <directory-path> as the root for all relative file paths (input
directories to scan for example).
-i, --analysis-in <left-dir-analysis> <right-dir-analysis>
Use pre-calculated directory analysis in place of reading local
directories. This is useful if you wish to do diffs between two
directory trees that are not on the same filesystem, or if you wish to
display different output about a diff without re-scanning the
filesystem.
-o, --analysis-out <file-name-root>
In addition to the requested output on STDOUT, write the analysis for
each of the scanned directories to files named <file-name-root>.left
and <file-name-root>.right. These analysis files are formatted so that
they can be used as inputs to the --analysis-in option.
""";
}
}