#!/usr/local/bin/ruby
# Operational Options
# -doPrefix=(Y|N)
# Find maximal common prefix
# -csum=(md5|sha1)
# Set the checksum to use
# Printing/Display Options
# -sCOL=pattern
# Search criteria -- i.e. which file data lines to print. "COL"
# is the name of a column, or the keyword 'ALL'. When a column
# other than NAME is given, then it will be used to match the
# first part of the printed output for that column. When NAME
# is the column, it will be used as a regexp against the NAME
# col. When appearing multiple times, these options combine
# with "OR". The ALL keyword directs the script to display a
# line for Every file -- even the ones that are the same on both
# sides. The default is to print if anything is different. Any
# option of this type will destroy the default setting.
# -pColTitles=(Y|N)
# Print column titles. Default is 'Y'
# -pDups=(Y|N)
# Under each file line printed, also print the names of files
# with same content, but with different names.
# -pPrefix=(Y|N)
# If -doPrefix=Y, then print the maximal common prefix before
# the file data and column titles are printed.
# -pCols=col1,col2,...
# What cols to print. Note that "NAME" must be last if it is
# listed because it has a variable column width.
# -backup
# Set options so that the output is useful for backup programs:
# -pPrefix=N -pColTitles=N -pCols=NAME -sH=\| -sH=\>
# This is useful when one wishes to have a list of "new" files
# or files that have "changed" in the case when the first
# checksum file is assumed to be for the backup directory while
# the second is assumed to be the current working directory from
# which the backup was generated.
#
# Output:
#
# One file-name per line.
# * NL: Number of copies on left with same check-sum
# * NR: Number of copies on right with same check-sum
# * CS: Checksum (content) difference between current file and file on other side of same name:
# = same
# | different
# < name on left only (but if NR>0, then a copy exists on right with different name)
# > name on right only (but if NL>0, then a copy exists on left with different name)
# * ctime: Create time
# .. N/A -- file is missing on one side
# == same
# <U left older
# >U left newer
# Where U is one of: (s)econds, (h)ours, (d)ays, (w)eeks, (m)onths, (y)ears
# * mtime: Modify Time
# same notation as ctime
# * size: File Size
# .. N/A -- file is missing on one side
# == same
# <U left smaller
# >U left bigger
# Where U is one of: (B)ytes, (K)ilobytes, (M)egabytes, (G)igabytes
################################################################################################################################
# Process command line arguments.
doPrefix = true
pDups = false
pPrefix = true
pColTitles = true
pCols = ['NL', 'NR', 'H', 'CT', 'MT', 'SZ', 'HASH', 'NAME']
pColsFmt = { 'NL' => "-3",
'NR' => "-3",
'H' => "-1",
'CT' => "-2",
'MT' => "-2",
'SZ' => "-2",
'HASH' => "-36",
'NAME' => 0 }
searchArg = [[ 'H' , '!=' ],
[ 'CT' , '!=' ],
[ 'MT' , '!=' ],
[ 'SZ' , '!=' ]]
searchArgD = true
csumToRep = [ 0, 1 ]
csumType = 'md5'
csumFiles = Array.new
srchArgPat = Regexp.new('-+s(' + pCols.join('|') + ')=(.+)$')
ARGV.each do |curArg|
if (curArg.match(/-+backup/i)) then
pPrefix = false
pColTitles = false
searchArg = [ ['H', '|'], ['H', '>'] ]
pCols = [ 'NAME' ]
elsif (tmp=curArg.match(/-+sALL/)) then
searchArg = nil
searchArgD = true
elsif (tmp=curArg.match(/-+pCols=(.+)/i)) then
pCols = tmp[1].split(/\s*,\s*/)
elsif (tmp=curArg.match(srchArgPat)) then
if (searchArgD) then
searchArg = Array.new
searchArgD = false
end
searchArg.push([ tmp[1], tmp[2] ])
elsif (tmp=curArg.match(/-+pColTitles=(Y|N)/i)) then
pColTitles = (tmp[1].upcase=='Y')
elsif (tmp=curArg.match(/-+pDups=(Y|N)/i)) then
pDups = (tmp[1].upcase=='Y')
elsif (tmp=curArg.match(/-+pPrefix=(Y|N)/i)) then
pPrefix = (tmp[1].upcase=='Y')
elsif (tmp=curArg.match(/-+doPrefix=(Y|N)/i)) then
doPrefix = (tmp[1].upcase=='Y')
elsif (tmp=curArg.match(/-+csum=(md5|sha1)/i)) then
csumType = tmp[1].downcase
pColsFmt['HASH'] = ( csumType == 'md5' ? "-31" : "-40" )
else
csumFiles.push(curArg)
if (curArg.match(/^-/)) then
STDERR.puts("WARNING: Argument assumed to be file-name: #{curArg}!")
end
end
end
# Make sure the arguments are OK
if (csumFiles.length > 2) then
STDERR.puts("ERROR: too many checksum files to process: #{csumFiles.inspect}!")
exit
elsif (csumFiles.length < 2) then
STDERR.puts("ERROR: too few checksum files to process: #{csumFiles.inspect}!")
exit
end
################################################################################################################################
# Compute units on a delta
def deltaUnits(a, b, utype)
if(a.nil? || b.nil?) then
return '..'
end
if(a==b) then
return '=='
else
{ 'time' => [ [ 31536000, 'y' ], [ 2592000, 'm' ], [ 604800, 'w' ], [ 86400, 'd' ], [3600, 'h' ], [ 1, 's' ] ],
'size' => [ [ 1073741824, 'G' ], [ 1048576, 'M' ], [ 1024, 'K' ], [ 1, 'B' ] ]
}[utype].each do |sz, lb|
if((a-b).abs>=sz) then
return (if (a<b) then '<' else '>' end) + lb
end
end
return 'ER'
end
end
################################################################################################################################
# Read in the data files...
fileInfo = Hash.new
['N2SN', 'N2H', 'N2CT', 'N2MT', 'N2SZ', 'H2N', 'PFIX'].each do |key| # Create all the arrays
fileInfo[key] = Array.new
end
csumFiles.each_with_index do |fileName, i|
['N2SN', 'N2H', 'N2CT', 'N2MT', 'N2SZ', 'H2N'].each do |key| # Init all the fileInfo members
fileInfo[key][i] = Hash.new
end
open(fileName, 'r') do |file|
fileFormat = nil
file.each_line do |line|
timeStamp, atime, ctime, mtime, md5, sha1, prtCharCnt, lineCnt, charCnt, fname = line.chomp.split(/ /, 10)
if (doPrefix) then # Find the maximal path-name prefix
if (fileInfo['PFIX'][i]) then
0.upto(fileInfo['PFIX'][i].length-1) do |j|
if (fileInfo['PFIX'][i][j] != fname[j]) then
if (j==0) then
fileInfo['PFIX'][i] = ''
else
fileInfo['PFIX'][i] = fileInfo['PFIX'][i][0..j-1]
end
break;
end
end
else
fileInfo['PFIX'][i] = fname;
end
end
csum = ( csumType=='md5' ? md5 : sha1 )
fileInfo['N2H'][i][fname] = csum # Store away the data...
fileInfo['N2SN'][i][fname] = fname
fileInfo['N2CT'][i][fname] = ctime.to_i
fileInfo['N2MT'][i][fname] = mtime.to_i
fileInfo['N2SZ'][i][fname] = charCnt.to_i
fileInfo['H2N'][i][csum] = (fileInfo['H2N'][i][csum] || Array.new).push(fname)
end
end
end
################################################################################################################################
# Post-process the data
#Compute short names if we found a prefix...
if (doPrefix) then
fileInfo['N2H'].each_index do |i|
if (fileInfo['PFIX'][i].length > 0) then
prefixRe = Regexp.new('^' + fileInfo['PFIX'][i])
fileInfo['N2H'][i].keys.each do |fname|
fileInfo['N2SN'][i][fname] = fname.sub(prefixRe, '')
end
end
end
end
################################################################################################################################
# Build up the search patterns from the inputs before we need them in the output stage below
searchCriteria = nil
if ( !(searchArg.nil?)) then
searchCriteria = Array.new
searchArg.each do |tag, pat|
if (tag.upcase == 'NAME') then
searchCriteria.push( [ tag, Regexp.new(pat) ] )
elsif (tmp=pat.match(/^(!{0,1})(.+)$/i)) then
searchCriteria.push([tag, [ (tmp[1].upcase == '!'), tmp[2]] ])
else
STDERR.puts("ERROR: Bad search argument: #{tag.inspect} => #{pat.inspect}!")
exit
end
end
end
################################################################################################################################
# Print out report
if (pPrefix && doPrefix) then
puts("< Prefix: #{fileInfo['PFIX'][0].inspect}")
puts("> Prefix: #{fileInfo['PFIX'][1].inspect}")
end
if (pColTitles) then
pCols.each { |ct| printf("%#{pColsFmt[ct]}s ", ct) }
printf("\n")
end
fnameSeenInBigList = Hash.new
fnameSeenInDupList = Hash.new
csumToRep.each do |i|
j = (i-1).abs # Index of the other file (0=>1, 1=>0)
fileInfo['N2H'][i].keys.sort.each do |fnamei|
shortName = fileInfo['N2SN'][i][fnamei]
if (! (fnameSeenInBigList.member?(shortName))) then
theCols = Hash.new
fnameSeenInBigList[shortName] = 1;
fnames = Array.new
fnames[i] = fnamei
fnames[j] = ( fileInfo['PFIX'][j] ? fileInfo['PFIX'][j] + shortName : fnamei )
inFile = [0, 1].map { |k| fileInfo['N2H'][k].member?(fnames[k]) } # What sides is file name on
curHash = fileInfo['N2H'][i][fnames[i]] # Hash of current file
numFnd = [0, 1].map { |k| (fileInfo['H2N'][k][curHash] || Array.new).length } # Number of times has appears on each side
theCols['NL'] = sprintf('%03d', numFnd[0])
theCols['NR'] = sprintf('%03d', numFnd[1])
theCols['H'] = ''
if(inFile[0] && inFile[1]) then
if(curHash == fileInfo['N2H'][j][fnames[j]]) then
theCols['H'] = '='
else
theCols['H'] = '|'
end
else
if(inFile[0]) then
theCols['H'] = '<'
else
theCols['H'] = '>'
end
end
theCols['CT'] = deltaUnits(fileInfo['N2CT'][0][fnames[0]], fileInfo['N2CT'][1][fnames[1]], 'time')
theCols['MT'] = deltaUnits(fileInfo['N2MT'][0][fnames[0]], fileInfo['N2MT'][1][fnames[1]], 'time')
theCols['SZ'] = deltaUnits(fileInfo['N2SZ'][0][fnames[0]], fileInfo['N2SZ'][1][fnames[1]], 'size')
theCols['HASH'] = fileInfo['N2H'][i][fnames[i]]
theCols['NAME'] = shortName
# Evaluate search criteria
printThisOne = false
if (searchCriteria.nil?) then
printThisOne = true
else
searchCriteria.each do |tag, pat|
if (printThisOne) then
break
end
if (pat.class == Regexp) then
if (theCols[tag].match(pat)) then
printThisOne = true
end
else
tmp = (theCols[tag].slice(0, pat[1].length) == pat[1])
printThisOne = ( pat[0] ? !tmp : tmp )
end
end
end
if (printThisOne) then
pCols.each { |ct| printf("%#{pColsFmt[ct]}s ", theCols[ct]) }
printf("\n")
if(pDups) then
sameContentFileList = Hash.new
[0, 1].each do |k|
if(fileInfo['H2N'][k].member?(curHash)) then
fileInfo['H2N'][k][curHash].sort.each do |dfname|
dshortName = fileInfo['N2SN'][k][dfname]
if (sameContentFileList.member?(dshortName)) then
sameContentFileList[dshortName] = '='
else
if (k == 0)
sameContentFileList[dshortName] = '<'
else
sameContentFileList[dshortName] = '>'
end
end
end
end
end
if (sameContentFileList.keys.length > 1) then
sameContentFileList.each do |dshortName, locs|
if ( !(fnameSeenInDupList.member?(dshortName))) then
printf("%s %s %s\n", ' '*53, locs, dshortName)
fnameSeenInDupList[dshortName] = 1
end
end
end
end
end
end
end
end
Generated by GNU Enscript 1.6.5.2.