Created
September 5, 2025 22:57
-
-
Save kylemcdonald/9dba4732d6c913ac1bb955dec424ae7d to your computer and use it in GitHub Desktop.
This script checks if all file names from the first directory exist somewhere in the second directory, including subdirectories recursively.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| File Name Checker | |
| This script checks if all file names from the first directory exist somewhere | |
| in the second directory, including subdirectories recursively. | |
| """ | |
| import os | |
| import sys | |
| import argparse | |
| from pathlib import Path | |
| from typing import Set, List, Tuple, Dict | |
| from natsort import natsorted | |
| def get_all_files_in_directory(directory: str, ignore_extensions: Set[str] = None) -> Set[str]: | |
| """ | |
| Get all file names (without paths) from a directory and its subdirectories. | |
| Args: | |
| directory: Path to the directory to scan | |
| ignore_extensions: Set of file extensions to ignore (case insensitive) | |
| Returns: | |
| Set of file names (without paths) | |
| """ | |
| file_names = set() | |
| if ignore_extensions is None: | |
| ignore_extensions = set() | |
| try: | |
| for root, dirs, files in os.walk(directory): | |
| for file in files: | |
| # Check if file extension should be ignored | |
| file_ext = os.path.splitext(file)[1].lower() | |
| if file_ext not in ignore_extensions: | |
| file_names.add(file) | |
| except FileNotFoundError: | |
| print(f"Error: Directory '{directory}' not found.") | |
| return set() | |
| except PermissionError: | |
| print(f"Error: Permission denied accessing directory '{directory}'.") | |
| return set() | |
| return file_names | |
| def get_file_paths_in_directory(directory: str, ignore_extensions: Set[str] = None) -> Dict[str, str]: | |
| """ | |
| Get all file names and their full paths from a directory and its subdirectories. | |
| Args: | |
| directory: Path to the directory to scan | |
| ignore_extensions: Set of file extensions to ignore (case insensitive) | |
| Returns: | |
| Dictionary mapping file names to their full paths | |
| """ | |
| file_paths = {} | |
| if ignore_extensions is None: | |
| ignore_extensions = set() | |
| try: | |
| for root, dirs, files in os.walk(directory): | |
| for file in files: | |
| # Check if file extension should be ignored | |
| file_ext = os.path.splitext(file)[1].lower() | |
| if file_ext not in ignore_extensions: | |
| file_path = os.path.join(root, file) | |
| file_paths[file] = file_path | |
| except FileNotFoundError: | |
| print(f"Error: Directory '{directory}' not found.") | |
| return {} | |
| except PermissionError: | |
| print(f"Error: Permission denied accessing directory '{directory}'.") | |
| return {} | |
| return file_paths | |
| def check_file_existence(source_files: Set[str], target_directories: List[str], ignore_extensions: Set[str] = None) -> Tuple[Set[str], Set[str]]: | |
| """ | |
| Check which files from source_files exist in any of the target_directories. | |
| Args: | |
| source_files: Set of file names to check | |
| target_directories: List of directories to search in (including subdirectories) | |
| ignore_extensions: Set of file extensions to ignore (case insensitive) | |
| Returns: | |
| Tuple of (found_files, missing_files) | |
| """ | |
| all_target_files = set() | |
| for target_directory in target_directories: | |
| target_files = get_all_files_in_directory(target_directory, ignore_extensions) | |
| all_target_files.update(target_files) | |
| found_files = source_files.intersection(all_target_files) | |
| missing_files = source_files - all_target_files | |
| return found_files, missing_files | |
| def create_soft_links_for_missing_files(files_to_find_dir: str, missing_files: Set[str], ignore_extensions: Set[str] = None) -> None: | |
| """ | |
| Create soft links for missing files in a 'missing' subdirectory. | |
| Args: | |
| files_to_find_dir: Directory containing the original files | |
| missing_files: Set of missing file names | |
| ignore_extensions: Set of file extensions to ignore (case insensitive) | |
| """ | |
| # Get file paths for the source directory | |
| source_file_paths = get_file_paths_in_directory(files_to_find_dir, ignore_extensions) | |
| # Create missing directory | |
| missing_dir = Path("missing") | |
| missing_dir.mkdir(exist_ok=True) | |
| print(f"\nCreating soft links in '{missing_dir}' directory...") | |
| created_links = 0 | |
| failed_links = 0 | |
| for file_name in missing_files: | |
| if file_name in source_file_paths: | |
| source_path = source_file_paths[file_name] | |
| link_path = missing_dir / file_name | |
| try: | |
| # Remove existing link if it exists | |
| if link_path.exists(): | |
| link_path.unlink() | |
| # Create soft link | |
| link_path.symlink_to(source_path) | |
| print(f" ✓ Created soft link: {file_name} -> {source_path}") | |
| created_links += 1 | |
| except OSError as e: | |
| print(f" ✗ Failed to create soft link for {file_name}: {e}") | |
| failed_links += 1 | |
| else: | |
| print(f" ✗ Could not find source path for {file_name}") | |
| failed_links += 1 | |
| print(f"\nSoft link creation summary:") | |
| print(f" Created: {created_links}") | |
| print(f" Failed: {failed_links}") | |
| def parse_arguments(): | |
| """Parse command line arguments.""" | |
| parser = argparse.ArgumentParser( | |
| description="Check if all file names from the first directory exist somewhere in the search directories.", | |
| formatter_class=argparse.RawDescriptionHelpFormatter, | |
| epilog=""" | |
| Examples: | |
| python file_checker.py /path/to/files/to/find /path/to/search1 /path/to/search2 | |
| python file_checker.py /path/to/files/to/find /path/to/search1 --ignore-extensions .tmp .log .bak | |
| python file_checker.py /path/to/files/to/find /path/to/search1 --ignore-extensions .TMP .LOG | |
| """ | |
| ) | |
| parser.add_argument( | |
| 'files_to_find_dir', | |
| help='Directory containing files to find' | |
| ) | |
| parser.add_argument( | |
| 'search_dirs', | |
| nargs='+', | |
| help='Directories to search in (including subdirectories)' | |
| ) | |
| parser.add_argument( | |
| '--ignore-extensions', | |
| nargs='+', | |
| default=[], | |
| help='File extensions to ignore (case insensitive, e.g., .tmp .log .bak)' | |
| ) | |
| return parser.parse_args() | |
| def main(): | |
| """Main function to handle command line arguments and run the check.""" | |
| args = parse_arguments() | |
| files_to_find_dir = args.files_to_find_dir | |
| search_in_dirs = args.search_dirs | |
| ignore_extensions = {ext.lower() for ext in args.ignore_extensions} | |
| # Validate directories exist | |
| if not os.path.isdir(files_to_find_dir): | |
| print(f"Error: Directory containing files to find '{files_to_find_dir}' does not exist or is not a directory.") | |
| sys.exit(1) | |
| for search_dir in search_in_dirs: | |
| if not os.path.isdir(search_dir): | |
| print(f"Error: Directory to search in '{search_dir}' does not exist or is not a directory.") | |
| sys.exit(1) | |
| print(f"Scanning directory for files to find: {files_to_find_dir}") | |
| if ignore_extensions: | |
| print(f"Ignoring file extensions: {', '.join(sorted(ignore_extensions))}") | |
| source_files = get_all_files_in_directory(files_to_find_dir, ignore_extensions) | |
| if not source_files: | |
| print("No files found in source directory (after filtering by ignored extensions).") | |
| sys.exit(0) | |
| print(f"Found {len(source_files)} files in directory to search for.") | |
| print(f"Scanning directories to search in: {', '.join(search_in_dirs)}") | |
| found_files, missing_files = check_file_existence(source_files, search_in_dirs, ignore_extensions) | |
| print(f"\nResults:") | |
| print(f"Files found in search directory: {len(found_files)}/{len(source_files)}") | |
| print(f"Files missing from search directory: {len(missing_files)}") | |
| if found_files: | |
| print(f"\nFiles found in search directory:") | |
| for file in natsorted(found_files): | |
| print(f" ✓ {file}") | |
| if missing_files: | |
| print(f"\nFiles missing from search directory:") | |
| for file in natsorted(missing_files): | |
| print(f" ✗ {file}") | |
| # Create soft links for missing files | |
| create_soft_links_for_missing_files(files_to_find_dir, missing_files, ignore_extensions) | |
| print(f"\n❌ Not all files from the first directory exist in any of the search directories.") | |
| sys.exit(1) | |
| else: | |
| print(f"\n✅ All files from the first directory exist in at least one of the search directories.") | |
| sys.exit(0) | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment