Skip to content

Instantly share code, notes, and snippets.

@timotheuslin
Last active April 5, 2020 05:07
Show Gist options
  • Select an option

  • Save timotheuslin/3485e49b68fcdcb454cbb27fa26090d6 to your computer and use it in GitHub Desktop.

Select an option

Save timotheuslin/3485e49b68fcdcb454cbb27fa26090d6 to your computer and use it in GitHub Desktop.
A customized scandir() which honors: (1)the non-traverse anchor files, (2) the excluded file pattern.
""" A customized scandir() which honors: (1)the non-traverse anchor files, (2) the
excluded file pattern.
Timothy Lin (c) 4/4/2020
"""
__all__ = ['scandir', 'NO_TRAVERSE_ANCHOR']
import os
import sys
import pathlib
# an anchor file to instruct the tool to ignore this folder altogether.
NO_TRAVERSE_ANCHOR = {'.scandir.ignore'}
def scandir(basedir, exclude=None, nontraverse=None):
""" a non-recursive pathlib-based scandir() with breadth-first search.
basedir - the base directory to start traversing.
exclude - a set() of the pattern to match
nontraverse - a set() of anchor-file names to indicate folders to be ignored when they
contains any one of these files.
scandir() yields a pathlib.Path object.
Notes:
1. A file or folder with a leading '.' in its name is ignored. This follows the Un*x
convention.
2. If a folder contains an anchor file, all its subordinate directories and files
are ignored."""
exclude = {} if (exclude is None) else {os.fspath(pathlib.Path(e)) for e in exclude}
if nontraverse is None:
nontraverse = NO_TRAVERSE_ANCHOR
def _dot(fx):
return fx.name[0] == '.'
def _match(fx):
return bool({ex for ex in exclude if fx.match(ex)})
def _nt(fx):
return next(iter('')) if (fx.name in nontraverse) else False
def _scandir():
dirX = [pathlib.Path(basedir)]
entX = []
while dirX or entX:
if entX:
yield entX.pop()
continue
try:
fx0 = [f for f in sorted(dirX.pop().glob('*'), reverse=True)
if not _nt(f) and not _dot(f) and not _match(f)]
except StopIteration:
continue
dirX += [d for d in fx0 if d.is_dir()]
entX += fx0
for f in _scandir():
yield f
if __name__ == '__main__':
for n in scandir(sys.argv[1]):
print(f'{os.fspath(n)}')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment