Skip to content

Instantly share code, notes, and snippets.

@nh2
Created January 23, 2022 07:18
Show Gist options
  • Save nh2/a4e8b6a764ac4d258dbb8a4daec5868b to your computer and use it in GitHub Desktop.
Save nh2/a4e8b6a764ac4d258dbb8a4daec5868b to your computer and use it in GitHub Desktop.

Revisions

  1. nh2 created this gist Jan 23, 2022.
    154 changes: 154 additions & 0 deletions mv-tree.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,154 @@
    #! /usr/bin/env python3

    # Moves contents of one directory tree into another, safely.

    import argparse
    import filecmp
    import shlex
    import sys

    from dataclasses import dataclass
    from pathlib import Path

    # pathlib has the default to follow symlinks for various functions,
    # which is dangerous for our use case; so we define wrapper funtions.

    def really_exists(path: Path) -> bool:
    return path.exists() and not path.is_symlink()


    def really_does_not_exist(path: Path) -> bool:
    return (not path.exists()) and (not path.is_symlink())


    def is_real_dir(path: Path) -> bool:
    return path.is_dir() and not path.is_symlink()


    def is_real_file(path: Path) -> bool:
    return path.is_file() and not path.is_symlink()


    def quote_path(path: Path) -> str:
    return shlex.join([str(path)])


    @dataclass
    class Settings:
    dry_run: bool = False
    skip_file_contents_comparison: bool = False


    def move_trees_rec(
    source_dir: Path,
    target_dir: Path,
    settings: Settings,
    ) -> bool:
    """Moves all dirents from `source_dir` to `target_dir` that can be moved
    conflict-free.
    Recurses into subdirectories.
    Does not follow symlinks.
    Removes `source_dir` contents if they match and are equal to corresponding
    `target_dir` contents.
    Returns whether the `source_dir`'s contents coulds be entirely moved such
    that it would now be empty.
    """
    all_moved = True
    for source in source_dir.iterdir():
    rel = source.relative_to(source_dir)
    target = target_dir / rel

    if really_does_not_exist(target):
    print(f"mv {quote_path(source)} -> {quote_path(target)}")
    if not settings.dry_run:
    source.rename(target)
    else:
    # target exists; handle it depending on file type.

    # if dir, recurse
    if is_real_dir(source) and is_real_dir(target):
    print(f"recurse-dir {quote_path(source)}")
    all_moved_rec = move_trees_rec(source_dir=source, target_dir=target, settings=settings)
    all_moved &= all_moved_rec

    # if symlink, count as moved if targets are equal
    elif source.is_symlink() and target.is_symlink():
    print(f"compare-symlink {quote_path(source)} <-> {quote_path(target)}")
    has_equal_link_contents = source.readlink() == target.readlink()
    if has_equal_link_contents:
    print(f"rm-equal-symlink {quote_path(source)}")
    if not settings.dry_run:
    source.unlink()
    else:
    print(f"conflict-symlink {quote_path(source)} <-> {quote_path(target)}")
    all_moved = False

    # if real file, count as moved if contents
    elif is_real_file(source) and is_real_file(target):
    compare_label = 'filemetadata' if settings.skip_file_contents_comparison else 'filecontents'
    print(f"compare-{compare_label} {quote_path(source)} <-> {quote_path(target)}")
    # Do comparison
    has_equal_contents = filecmp.cmp(str(source), str(target), shallow=settings.skip_file_contents_comparison)
    filecmp.clear_cache() # we need the cache (potentially consuming unbound memory)
    if has_equal_contents:
    print(f"rm-equal-{compare_label} {quote_path(source)}")
    if not settings.dry_run:
    source.unlink()
    else:
    print(f"conflict-{compare_label} {quote_path(source)} <-> {quote_path(target)}")
    all_moved = False

    # if any other file type, count as conflict (not moved)
    else:
    print(f"conflict {quote_path(source)} <-> {quote_path(target)}")
    all_moved = False

    # Delete source dir if it's now empty.
    if all_moved:
    print(f"rmdir {source_dir}")
    if not settings.dry_run:
    is_source_dir_still_empty = not any(True for _ in source_dir.iterdir())
    if is_source_dir_still_empty:
    source_dir.rmdir()
    else:
    print(f"Contents of source dir '{source_dir}' were modified, not performing rmdir", file=sys.stderr)

    return all_moved


    def move_trees(
    source_dir: Path,
    target_dir: Path,
    settings: Settings,
    ) -> None:
    if not is_real_dir(source_dir):
    exit(f"Source dir '{source_dir}' must be an existing directory")
    if not is_real_dir(target_dir):
    exit(f"Target dir '{target_dir}' must be an existing directory")

    move_trees_rec(source_dir=source_dir, target_dir=target_dir, settings=settings)


    def main():
    parser = argparse.ArgumentParser(description='Moves contents of one directory tree into another, safely. Only moves those contents can be moved conflict-free.')
    parser.add_argument('source', metavar='SOURCE', help='source directory tree')
    parser.add_argument('target', metavar='TARGET', help='target directory tree')
    parser.add_argument('--dry-run', dest='dry_run', action='store_true', help='Perform a trial run with no changes made.')
    parser.add_argument('--skip-file-contents-comparison', dest='skip_file_contents_comparison', action='store_true', help='Compare only file size and modification time instead of contents when files are to be compared.')

    args = parser.parse_args()

    move_trees(
    source_dir=Path(args.source),
    target_dir=Path(args.target),
    settings=Settings(
    dry_run=args.dry_run,
    skip_file_contents_comparison=args.skip_file_contents_comparison,
    ),
    )


    if __name__ == '__main__':
    main()