Created
September 5, 2025 22:57
-
-
Save kylemcdonald/9dba4732d6c913ac1bb955dec424ae7d to your computer and use it in GitHub Desktop.
Revisions
-
kylemcdonald created this gist
Sep 5, 2025 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,247 @@ #!/usr/bin/env python3 """ File Name Checker This script checks if all file names from the first directory exist somewhere in the second directory, including subdirectories recursively. """ import os import sys import argparse from pathlib import Path from typing import Set, List, Tuple, Dict from natsort import natsorted def get_all_files_in_directory(directory: str, ignore_extensions: Set[str] = None) -> Set[str]: """ Get all file names (without paths) from a directory and its subdirectories. Args: directory: Path to the directory to scan ignore_extensions: Set of file extensions to ignore (case insensitive) Returns: Set of file names (without paths) """ file_names = set() if ignore_extensions is None: ignore_extensions = set() try: for root, dirs, files in os.walk(directory): for file in files: # Check if file extension should be ignored file_ext = os.path.splitext(file)[1].lower() if file_ext not in ignore_extensions: file_names.add(file) except FileNotFoundError: print(f"Error: Directory '{directory}' not found.") return set() except PermissionError: print(f"Error: Permission denied accessing directory '{directory}'.") return set() return file_names def get_file_paths_in_directory(directory: str, ignore_extensions: Set[str] = None) -> Dict[str, str]: """ Get all file names and their full paths from a directory and its subdirectories. Args: directory: Path to the directory to scan ignore_extensions: Set of file extensions to ignore (case insensitive) Returns: Dictionary mapping file names to their full paths """ file_paths = {} if ignore_extensions is None: ignore_extensions = set() try: for root, dirs, files in os.walk(directory): for file in files: # Check if file extension should be ignored file_ext = os.path.splitext(file)[1].lower() if file_ext not in ignore_extensions: file_path = os.path.join(root, file) file_paths[file] = file_path except FileNotFoundError: print(f"Error: Directory '{directory}' not found.") return {} except PermissionError: print(f"Error: Permission denied accessing directory '{directory}'.") return {} return file_paths def check_file_existence(source_files: Set[str], target_directories: List[str], ignore_extensions: Set[str] = None) -> Tuple[Set[str], Set[str]]: """ Check which files from source_files exist in any of the target_directories. Args: source_files: Set of file names to check target_directories: List of directories to search in (including subdirectories) ignore_extensions: Set of file extensions to ignore (case insensitive) Returns: Tuple of (found_files, missing_files) """ all_target_files = set() for target_directory in target_directories: target_files = get_all_files_in_directory(target_directory, ignore_extensions) all_target_files.update(target_files) found_files = source_files.intersection(all_target_files) missing_files = source_files - all_target_files return found_files, missing_files def create_soft_links_for_missing_files(files_to_find_dir: str, missing_files: Set[str], ignore_extensions: Set[str] = None) -> None: """ Create soft links for missing files in a 'missing' subdirectory. Args: files_to_find_dir: Directory containing the original files missing_files: Set of missing file names ignore_extensions: Set of file extensions to ignore (case insensitive) """ # Get file paths for the source directory source_file_paths = get_file_paths_in_directory(files_to_find_dir, ignore_extensions) # Create missing directory missing_dir = Path("missing") missing_dir.mkdir(exist_ok=True) print(f"\nCreating soft links in '{missing_dir}' directory...") created_links = 0 failed_links = 0 for file_name in missing_files: if file_name in source_file_paths: source_path = source_file_paths[file_name] link_path = missing_dir / file_name try: # Remove existing link if it exists if link_path.exists(): link_path.unlink() # Create soft link link_path.symlink_to(source_path) print(f" ✓ Created soft link: {file_name} -> {source_path}") created_links += 1 except OSError as e: print(f" ✗ Failed to create soft link for {file_name}: {e}") failed_links += 1 else: print(f" ✗ Could not find source path for {file_name}") failed_links += 1 print(f"\nSoft link creation summary:") print(f" Created: {created_links}") print(f" Failed: {failed_links}") def parse_arguments(): """Parse command line arguments.""" parser = argparse.ArgumentParser( description="Check if all file names from the first directory exist somewhere in the search directories.", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: python file_checker.py /path/to/files/to/find /path/to/search1 /path/to/search2 python file_checker.py /path/to/files/to/find /path/to/search1 --ignore-extensions .tmp .log .bak python file_checker.py /path/to/files/to/find /path/to/search1 --ignore-extensions .TMP .LOG """ ) parser.add_argument( 'files_to_find_dir', help='Directory containing files to find' ) parser.add_argument( 'search_dirs', nargs='+', help='Directories to search in (including subdirectories)' ) parser.add_argument( '--ignore-extensions', nargs='+', default=[], help='File extensions to ignore (case insensitive, e.g., .tmp .log .bak)' ) return parser.parse_args() def main(): """Main function to handle command line arguments and run the check.""" args = parse_arguments() files_to_find_dir = args.files_to_find_dir search_in_dirs = args.search_dirs ignore_extensions = {ext.lower() for ext in args.ignore_extensions} # Validate directories exist if not os.path.isdir(files_to_find_dir): print(f"Error: Directory containing files to find '{files_to_find_dir}' does not exist or is not a directory.") sys.exit(1) for search_dir in search_in_dirs: if not os.path.isdir(search_dir): print(f"Error: Directory to search in '{search_dir}' does not exist or is not a directory.") sys.exit(1) print(f"Scanning directory for files to find: {files_to_find_dir}") if ignore_extensions: print(f"Ignoring file extensions: {', '.join(sorted(ignore_extensions))}") source_files = get_all_files_in_directory(files_to_find_dir, ignore_extensions) if not source_files: print("No files found in source directory (after filtering by ignored extensions).") sys.exit(0) print(f"Found {len(source_files)} files in directory to search for.") print(f"Scanning directories to search in: {', '.join(search_in_dirs)}") found_files, missing_files = check_file_existence(source_files, search_in_dirs, ignore_extensions) print(f"\nResults:") print(f"Files found in search directory: {len(found_files)}/{len(source_files)}") print(f"Files missing from search directory: {len(missing_files)}") if found_files: print(f"\nFiles found in search directory:") for file in natsorted(found_files): print(f" ✓ {file}") if missing_files: print(f"\nFiles missing from search directory:") for file in natsorted(missing_files): print(f" ✗ {file}") # Create soft links for missing files create_soft_links_for_missing_files(files_to_find_dir, missing_files, ignore_extensions) print(f"\n❌ Not all files from the first directory exist in any of the search directories.") sys.exit(1) else: print(f"\n✅ All files from the first directory exist in at least one of the search directories.") sys.exit(0) if __name__ == "__main__": main()