Skip to content

Instantly share code, notes, and snippets.

@kylemcdonald
Created September 5, 2025 22:57
Show Gist options
  • Save kylemcdonald/9dba4732d6c913ac1bb955dec424ae7d to your computer and use it in GitHub Desktop.
Save kylemcdonald/9dba4732d6c913ac1bb955dec424ae7d to your computer and use it in GitHub Desktop.

Revisions

  1. kylemcdonald created this gist Sep 5, 2025.
    247 changes: 247 additions & 0 deletions file_checker.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,247 @@
    #!/usr/bin/env python3
    """
    File Name Checker
    This script checks if all file names from the first directory exist somewhere
    in the second directory, including subdirectories recursively.
    """

    import os
    import sys
    import argparse
    from pathlib import Path
    from typing import Set, List, Tuple, Dict
    from natsort import natsorted


    def get_all_files_in_directory(directory: str, ignore_extensions: Set[str] = None) -> Set[str]:
    """
    Get all file names (without paths) from a directory and its subdirectories.
    Args:
    directory: Path to the directory to scan
    ignore_extensions: Set of file extensions to ignore (case insensitive)
    Returns:
    Set of file names (without paths)
    """
    file_names = set()

    if ignore_extensions is None:
    ignore_extensions = set()

    try:
    for root, dirs, files in os.walk(directory):
    for file in files:
    # Check if file extension should be ignored
    file_ext = os.path.splitext(file)[1].lower()
    if file_ext not in ignore_extensions:
    file_names.add(file)
    except FileNotFoundError:
    print(f"Error: Directory '{directory}' not found.")
    return set()
    except PermissionError:
    print(f"Error: Permission denied accessing directory '{directory}'.")
    return set()

    return file_names


    def get_file_paths_in_directory(directory: str, ignore_extensions: Set[str] = None) -> Dict[str, str]:
    """
    Get all file names and their full paths from a directory and its subdirectories.
    Args:
    directory: Path to the directory to scan
    ignore_extensions: Set of file extensions to ignore (case insensitive)
    Returns:
    Dictionary mapping file names to their full paths
    """
    file_paths = {}

    if ignore_extensions is None:
    ignore_extensions = set()

    try:
    for root, dirs, files in os.walk(directory):
    for file in files:
    # Check if file extension should be ignored
    file_ext = os.path.splitext(file)[1].lower()
    if file_ext not in ignore_extensions:
    file_path = os.path.join(root, file)
    file_paths[file] = file_path
    except FileNotFoundError:
    print(f"Error: Directory '{directory}' not found.")
    return {}
    except PermissionError:
    print(f"Error: Permission denied accessing directory '{directory}'.")
    return {}

    return file_paths


    def check_file_existence(source_files: Set[str], target_directories: List[str], ignore_extensions: Set[str] = None) -> Tuple[Set[str], Set[str]]:
    """
    Check which files from source_files exist in any of the target_directories.
    Args:
    source_files: Set of file names to check
    target_directories: List of directories to search in (including subdirectories)
    ignore_extensions: Set of file extensions to ignore (case insensitive)
    Returns:
    Tuple of (found_files, missing_files)
    """
    all_target_files = set()

    for target_directory in target_directories:
    target_files = get_all_files_in_directory(target_directory, ignore_extensions)
    all_target_files.update(target_files)

    found_files = source_files.intersection(all_target_files)
    missing_files = source_files - all_target_files

    return found_files, missing_files


    def create_soft_links_for_missing_files(files_to_find_dir: str, missing_files: Set[str], ignore_extensions: Set[str] = None) -> None:
    """
    Create soft links for missing files in a 'missing' subdirectory.
    Args:
    files_to_find_dir: Directory containing the original files
    missing_files: Set of missing file names
    ignore_extensions: Set of file extensions to ignore (case insensitive)
    """
    # Get file paths for the source directory
    source_file_paths = get_file_paths_in_directory(files_to_find_dir, ignore_extensions)

    # Create missing directory
    missing_dir = Path("missing")
    missing_dir.mkdir(exist_ok=True)

    print(f"\nCreating soft links in '{missing_dir}' directory...")

    created_links = 0
    failed_links = 0

    for file_name in missing_files:
    if file_name in source_file_paths:
    source_path = source_file_paths[file_name]
    link_path = missing_dir / file_name

    try:
    # Remove existing link if it exists
    if link_path.exists():
    link_path.unlink()

    # Create soft link
    link_path.symlink_to(source_path)
    print(f" ✓ Created soft link: {file_name} -> {source_path}")
    created_links += 1
    except OSError as e:
    print(f" ✗ Failed to create soft link for {file_name}: {e}")
    failed_links += 1
    else:
    print(f" ✗ Could not find source path for {file_name}")
    failed_links += 1

    print(f"\nSoft link creation summary:")
    print(f" Created: {created_links}")
    print(f" Failed: {failed_links}")


    def parse_arguments():
    """Parse command line arguments."""
    parser = argparse.ArgumentParser(
    description="Check if all file names from the first directory exist somewhere in the search directories.",
    formatter_class=argparse.RawDescriptionHelpFormatter,
    epilog="""
    Examples:
    python file_checker.py /path/to/files/to/find /path/to/search1 /path/to/search2
    python file_checker.py /path/to/files/to/find /path/to/search1 --ignore-extensions .tmp .log .bak
    python file_checker.py /path/to/files/to/find /path/to/search1 --ignore-extensions .TMP .LOG
    """
    )

    parser.add_argument(
    'files_to_find_dir',
    help='Directory containing files to find'
    )

    parser.add_argument(
    'search_dirs',
    nargs='+',
    help='Directories to search in (including subdirectories)'
    )

    parser.add_argument(
    '--ignore-extensions',
    nargs='+',
    default=[],
    help='File extensions to ignore (case insensitive, e.g., .tmp .log .bak)'
    )

    return parser.parse_args()


    def main():
    """Main function to handle command line arguments and run the check."""
    args = parse_arguments()

    files_to_find_dir = args.files_to_find_dir
    search_in_dirs = args.search_dirs
    ignore_extensions = {ext.lower() for ext in args.ignore_extensions}

    # Validate directories exist
    if not os.path.isdir(files_to_find_dir):
    print(f"Error: Directory containing files to find '{files_to_find_dir}' does not exist or is not a directory.")
    sys.exit(1)

    for search_dir in search_in_dirs:
    if not os.path.isdir(search_dir):
    print(f"Error: Directory to search in '{search_dir}' does not exist or is not a directory.")
    sys.exit(1)

    print(f"Scanning directory for files to find: {files_to_find_dir}")
    if ignore_extensions:
    print(f"Ignoring file extensions: {', '.join(sorted(ignore_extensions))}")

    source_files = get_all_files_in_directory(files_to_find_dir, ignore_extensions)

    if not source_files:
    print("No files found in source directory (after filtering by ignored extensions).")
    sys.exit(0)

    print(f"Found {len(source_files)} files in directory to search for.")
    print(f"Scanning directories to search in: {', '.join(search_in_dirs)}")

    found_files, missing_files = check_file_existence(source_files, search_in_dirs, ignore_extensions)

    print(f"\nResults:")
    print(f"Files found in search directory: {len(found_files)}/{len(source_files)}")
    print(f"Files missing from search directory: {len(missing_files)}")

    if found_files:
    print(f"\nFiles found in search directory:")
    for file in natsorted(found_files):
    print(f" ✓ {file}")

    if missing_files:
    print(f"\nFiles missing from search directory:")
    for file in natsorted(missing_files):
    print(f" ✗ {file}")

    # Create soft links for missing files
    create_soft_links_for_missing_files(files_to_find_dir, missing_files, ignore_extensions)

    print(f"\n❌ Not all files from the first directory exist in any of the search directories.")
    sys.exit(1)
    else:
    print(f"\n✅ All files from the first directory exist in at least one of the search directories.")
    sys.exit(0)


    if __name__ == "__main__":
    main()