#!/usr/bin/env python3 ''' A script to recursively compare two directories (including file size and file hash changes) Usage: python3 compare_dirs.py DIR1 DIR2 ''' import os, sys, hashlib import argparse parser = argparse.ArgumentParser(description=''' A script to recursively compare two directories (including file size and file hash changes) ''', usage='Usage: python3 compare_dirs.py --base DIR1 --newer DIR2') parser.add_argument('--md5', default=False, help='File name where to save CSV data in the name format %par_UTCstartdate-enddate_%granularity.csv') parser.add_argument('--size', default=False, help='File name where to save CSV data in the name format %par_UTCstartdate-enddate_%granularity.csv') parser.add_argument('--color', default=True, help='File name where to save CSV data in the name format %par_UTCstartdate-enddate_%granularity.csv') parser.add_argument('--base', default='', help='base folder path' ) parser.add_argument('--newer', default='', help='base folder path' ) args = parser.parse_args() COMPARE_FILES = args.size # should file sizes be compared if their names are the same? MD5 = args.md5 # should file hash sums be compared if their names and sizes are the same? COLORIZE = args.color # colorization for bash (only Linux & Unix) base = args.base # colorization for bash (only Linux & Unix) newer = args.newer # colorization for bash (only Linux & Unix) #COMPARE_FILES = True # should file sizes be compared if their names are the same? #MD5 = True # should file hash sums be compared if their names and sizes are the same? #COLORIZE = False # colorization for bash (only Linux & Unix) def md5sum(fn): hasher = hashlib.md5() with open(fn, 'rb') as f: hasher.update(f.read()) return hasher.hexdigest() WHITE, RED, GREEN = 15, 196, 46 fg = lambda text, color: "\33[38;5;" + str(color) + "m" + text + "\33[0m" def compare_dirs(d1: "old directory name", d2: "new directory name"): def print_local(a, msg): text = '{} {} {}'.format('DIR ' if a[2] else 'FILE', a[1], msg) if COLORIZE: color = {'added': GREEN, 'removed': RED}.get(msg, WHITE) print(fg(text, color)) else: print(text) # Ensure validity for d in [d1,d2]: if not os.path.isdir(d): raise ValueError("not a directory: " + d) # Get relative path l1 = [(fn, os.path.join(d1, fn)) for fn in os.listdir(d1)] l2 = [(fn, os.path.join(d2, fn)) for fn in os.listdir(d2)] # Determine type: directory or file? l1 = sorted([(fn, pth, os.path.isdir(pth)) for fn, pth in l1]) l2 = sorted([(fn, pth, os.path.isdir(pth)) for fn, pth in l2]) i1 = i2 = 0 cnt = 0 common_dirs = [] while i1 remember the name for recursion common_dirs.append((l1[i1][1], l2[i2][1])) elif COMPARE_FILES: # Pair of files -> compare their sizes size1 = os.stat(l1[i1][1]).st_size size2 = os.stat(l2[i2][1]).st_size if size1!=size2: print_local(l1[i1],'size changed: {:d} -> {:d}'.format(size1, size2)) elif MD5: # Sizes are the same -> compare MD5 hashes if md5sum(l1[i1][1])!=md5sum(l2[i2][1]): print_local(l1[i1],'hash changed') else: print_local(l1[i1],'type changed') i1 += 1 i2 += 1 elif l1[i1][0]l2[i2][0]: print_local(l2[i2],'added') i2 += 1 cnt += 1 while i1