Skip to content

Instantly share code, notes, and snippets.

@kylemcdonald
Created September 5, 2025 22:57
Show Gist options
  • Save kylemcdonald/9dba4732d6c913ac1bb955dec424ae7d to your computer and use it in GitHub Desktop.
Save kylemcdonald/9dba4732d6c913ac1bb955dec424ae7d to your computer and use it in GitHub Desktop.
This script checks if all file names from the first directory exist somewhere in the second directory, including subdirectories recursively.
#!/usr/bin/env python3
"""
File Name Checker
This script checks if all file names from the first directory exist somewhere
in the second directory, including subdirectories recursively.
"""
import os
import sys
import argparse
from pathlib import Path
from typing import Set, List, Tuple, Dict
from natsort import natsorted
def get_all_files_in_directory(directory: str, ignore_extensions: Set[str] = None) -> Set[str]:
"""
Get all file names (without paths) from a directory and its subdirectories.
Args:
directory: Path to the directory to scan
ignore_extensions: Set of file extensions to ignore (case insensitive)
Returns:
Set of file names (without paths)
"""
file_names = set()
if ignore_extensions is None:
ignore_extensions = set()
try:
for root, dirs, files in os.walk(directory):
for file in files:
# Check if file extension should be ignored
file_ext = os.path.splitext(file)[1].lower()
if file_ext not in ignore_extensions:
file_names.add(file)
except FileNotFoundError:
print(f"Error: Directory '{directory}' not found.")
return set()
except PermissionError:
print(f"Error: Permission denied accessing directory '{directory}'.")
return set()
return file_names
def get_file_paths_in_directory(directory: str, ignore_extensions: Set[str] = None) -> Dict[str, str]:
"""
Get all file names and their full paths from a directory and its subdirectories.
Args:
directory: Path to the directory to scan
ignore_extensions: Set of file extensions to ignore (case insensitive)
Returns:
Dictionary mapping file names to their full paths
"""
file_paths = {}
if ignore_extensions is None:
ignore_extensions = set()
try:
for root, dirs, files in os.walk(directory):
for file in files:
# Check if file extension should be ignored
file_ext = os.path.splitext(file)[1].lower()
if file_ext not in ignore_extensions:
file_path = os.path.join(root, file)
file_paths[file] = file_path
except FileNotFoundError:
print(f"Error: Directory '{directory}' not found.")
return {}
except PermissionError:
print(f"Error: Permission denied accessing directory '{directory}'.")
return {}
return file_paths
def check_file_existence(source_files: Set[str], target_directories: List[str], ignore_extensions: Set[str] = None) -> Tuple[Set[str], Set[str]]:
"""
Check which files from source_files exist in any of the target_directories.
Args:
source_files: Set of file names to check
target_directories: List of directories to search in (including subdirectories)
ignore_extensions: Set of file extensions to ignore (case insensitive)
Returns:
Tuple of (found_files, missing_files)
"""
all_target_files = set()
for target_directory in target_directories:
target_files = get_all_files_in_directory(target_directory, ignore_extensions)
all_target_files.update(target_files)
found_files = source_files.intersection(all_target_files)
missing_files = source_files - all_target_files
return found_files, missing_files
def create_soft_links_for_missing_files(files_to_find_dir: str, missing_files: Set[str], ignore_extensions: Set[str] = None) -> None:
"""
Create soft links for missing files in a 'missing' subdirectory.
Args:
files_to_find_dir: Directory containing the original files
missing_files: Set of missing file names
ignore_extensions: Set of file extensions to ignore (case insensitive)
"""
# Get file paths for the source directory
source_file_paths = get_file_paths_in_directory(files_to_find_dir, ignore_extensions)
# Create missing directory
missing_dir = Path("missing")
missing_dir.mkdir(exist_ok=True)
print(f"\nCreating soft links in '{missing_dir}' directory...")
created_links = 0
failed_links = 0
for file_name in missing_files:
if file_name in source_file_paths:
source_path = source_file_paths[file_name]
link_path = missing_dir / file_name
try:
# Remove existing link if it exists
if link_path.exists():
link_path.unlink()
# Create soft link
link_path.symlink_to(source_path)
print(f" ✓ Created soft link: {file_name} -> {source_path}")
created_links += 1
except OSError as e:
print(f" ✗ Failed to create soft link for {file_name}: {e}")
failed_links += 1
else:
print(f" ✗ Could not find source path for {file_name}")
failed_links += 1
print(f"\nSoft link creation summary:")
print(f" Created: {created_links}")
print(f" Failed: {failed_links}")
def parse_arguments():
"""Parse command line arguments."""
parser = argparse.ArgumentParser(
description="Check if all file names from the first directory exist somewhere in the search directories.",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
python file_checker.py /path/to/files/to/find /path/to/search1 /path/to/search2
python file_checker.py /path/to/files/to/find /path/to/search1 --ignore-extensions .tmp .log .bak
python file_checker.py /path/to/files/to/find /path/to/search1 --ignore-extensions .TMP .LOG
"""
)
parser.add_argument(
'files_to_find_dir',
help='Directory containing files to find'
)
parser.add_argument(
'search_dirs',
nargs='+',
help='Directories to search in (including subdirectories)'
)
parser.add_argument(
'--ignore-extensions',
nargs='+',
default=[],
help='File extensions to ignore (case insensitive, e.g., .tmp .log .bak)'
)
return parser.parse_args()
def main():
"""Main function to handle command line arguments and run the check."""
args = parse_arguments()
files_to_find_dir = args.files_to_find_dir
search_in_dirs = args.search_dirs
ignore_extensions = {ext.lower() for ext in args.ignore_extensions}
# Validate directories exist
if not os.path.isdir(files_to_find_dir):
print(f"Error: Directory containing files to find '{files_to_find_dir}' does not exist or is not a directory.")
sys.exit(1)
for search_dir in search_in_dirs:
if not os.path.isdir(search_dir):
print(f"Error: Directory to search in '{search_dir}' does not exist or is not a directory.")
sys.exit(1)
print(f"Scanning directory for files to find: {files_to_find_dir}")
if ignore_extensions:
print(f"Ignoring file extensions: {', '.join(sorted(ignore_extensions))}")
source_files = get_all_files_in_directory(files_to_find_dir, ignore_extensions)
if not source_files:
print("No files found in source directory (after filtering by ignored extensions).")
sys.exit(0)
print(f"Found {len(source_files)} files in directory to search for.")
print(f"Scanning directories to search in: {', '.join(search_in_dirs)}")
found_files, missing_files = check_file_existence(source_files, search_in_dirs, ignore_extensions)
print(f"\nResults:")
print(f"Files found in search directory: {len(found_files)}/{len(source_files)}")
print(f"Files missing from search directory: {len(missing_files)}")
if found_files:
print(f"\nFiles found in search directory:")
for file in natsorted(found_files):
print(f" ✓ {file}")
if missing_files:
print(f"\nFiles missing from search directory:")
for file in natsorted(missing_files):
print(f" ✗ {file}")
# Create soft links for missing files
create_soft_links_for_missing_files(files_to_find_dir, missing_files, ignore_extensions)
print(f"\n❌ Not all files from the first directory exist in any of the search directories.")
sys.exit(1)
else:
print(f"\n✅ All files from the first directory exist in at least one of the search directories.")
sys.exit(0)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment