Skip to content

Instantly share code, notes, and snippets.

@timotheuslin
Last active April 5, 2020 05:07
Show Gist options
  • Select an option

  • Save timotheuslin/3485e49b68fcdcb454cbb27fa26090d6 to your computer and use it in GitHub Desktop.

Select an option

Save timotheuslin/3485e49b68fcdcb454cbb27fa26090d6 to your computer and use it in GitHub Desktop.

Revisions

  1. timotheuslin revised this gist Apr 5, 2020. 1 changed file with 5 additions and 5 deletions.
    10 changes: 5 additions & 5 deletions scandir.py
    Original file line number Diff line number Diff line change
    @@ -44,18 +44,18 @@ def _nt(fx):

    def _scandir():
    dirX = [pathlib.Path(basedir)]
    fileX = []
    while dirX or fileX:
    if fileX:
    yield fileX.pop()
    entX = []
    while dirX or entX:
    if entX:
    yield entX.pop()
    continue
    try:
    fx0 = [f for f in sorted(dirX.pop().glob('*'), reverse=True)
    if not _nt(f) and not _dot(f) and not _match(f)]
    except StopIteration:
    continue
    dirX += [d for d in fx0 if d.is_dir()]
    fileX += [f for f in fx0 if f.is_file()]
    entX += fx0

    for f in _scandir():
    yield f
  2. timotheuslin created this gist Apr 5, 2020.
    65 changes: 65 additions & 0 deletions scandir.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,65 @@
    """ A customized scandir() which honors: (1)the non-traverse anchor files, (2) the
    excluded file pattern.
    Timothy Lin (c) 4/4/2020
    """

    __all__ = ['scandir', 'NO_TRAVERSE_ANCHOR']

    import os
    import sys
    import pathlib

    # an anchor file to instruct the tool to ignore this folder altogether.
    NO_TRAVERSE_ANCHOR = {'.scandir.ignore'}

    def scandir(basedir, exclude=None, nontraverse=None):
    """ a non-recursive pathlib-based scandir() with breadth-first search.
    basedir - the base directory to start traversing.
    exclude - a set() of the pattern to match
    nontraverse - a set() of anchor-file names to indicate folders to be ignored when they
    contains any one of these files.
    scandir() yields a pathlib.Path object.
    Notes:
    1. A file or folder with a leading '.' in its name is ignored. This follows the Un*x
    convention.
    2. If a folder contains an anchor file, all its subordinate directories and files
    are ignored."""

    exclude = {} if (exclude is None) else {os.fspath(pathlib.Path(e)) for e in exclude}
    if nontraverse is None:
    nontraverse = NO_TRAVERSE_ANCHOR

    def _dot(fx):
    return fx.name[0] == '.'

    def _match(fx):
    return bool({ex for ex in exclude if fx.match(ex)})

    def _nt(fx):
    return next(iter('')) if (fx.name in nontraverse) else False

    def _scandir():
    dirX = [pathlib.Path(basedir)]
    fileX = []
    while dirX or fileX:
    if fileX:
    yield fileX.pop()
    continue
    try:
    fx0 = [f for f in sorted(dirX.pop().glob('*'), reverse=True)
    if not _nt(f) and not _dot(f) and not _match(f)]
    except StopIteration:
    continue
    dirX += [d for d in fx0 if d.is_dir()]
    fileX += [f for f in fx0 if f.is_file()]

    for f in _scandir():
    yield f

    if __name__ == '__main__':
    for n in scandir(sys.argv[1]):
    print(f'{os.fspath(n)}')