Last active
April 5, 2020 05:07
-
-
Save timotheuslin/3485e49b68fcdcb454cbb27fa26090d6 to your computer and use it in GitHub Desktop.
A customized scandir() which honors: (1)the non-traverse anchor files, (2) the excluded file pattern.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ A customized scandir() which honors: (1)the non-traverse anchor files, (2) the | |
| excluded file pattern. | |
| Timothy Lin (c) 4/4/2020 | |
| """ | |
| __all__ = ['scandir', 'NO_TRAVERSE_ANCHOR'] | |
| import os | |
| import sys | |
| import pathlib | |
| # an anchor file to instruct the tool to ignore this folder altogether. | |
| NO_TRAVERSE_ANCHOR = {'.scandir.ignore'} | |
| def scandir(basedir, exclude=None, nontraverse=None): | |
| """ a non-recursive pathlib-based scandir() with breadth-first search. | |
| basedir - the base directory to start traversing. | |
| exclude - a set() of the pattern to match | |
| nontraverse - a set() of anchor-file names to indicate folders to be ignored when they | |
| contains any one of these files. | |
| scandir() yields a pathlib.Path object. | |
| Notes: | |
| 1. A file or folder with a leading '.' in its name is ignored. This follows the Un*x | |
| convention. | |
| 2. If a folder contains an anchor file, all its subordinate directories and files | |
| are ignored.""" | |
| exclude = {} if (exclude is None) else {os.fspath(pathlib.Path(e)) for e in exclude} | |
| if nontraverse is None: | |
| nontraverse = NO_TRAVERSE_ANCHOR | |
| def _dot(fx): | |
| return fx.name[0] == '.' | |
| def _match(fx): | |
| return bool({ex for ex in exclude if fx.match(ex)}) | |
| def _nt(fx): | |
| return next(iter('')) if (fx.name in nontraverse) else False | |
| def _scandir(): | |
| dirX = [pathlib.Path(basedir)] | |
| entX = [] | |
| while dirX or entX: | |
| if entX: | |
| yield entX.pop() | |
| continue | |
| try: | |
| fx0 = [f for f in sorted(dirX.pop().glob('*'), reverse=True) | |
| if not _nt(f) and not _dot(f) and not _match(f)] | |
| except StopIteration: | |
| continue | |
| dirX += [d for d in fx0 if d.is_dir()] | |
| entX += fx0 | |
| for f in _scandir(): | |
| yield f | |
| if __name__ == '__main__': | |
| for n in scandir(sys.argv[1]): | |
| print(f'{os.fspath(n)}') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment