Skip to content

Instantly share code, notes, and snippets.

@ricrogz
Last active June 14, 2020 20:59
Show Gist options
  • Select an option

  • Save ricrogz/25a621f29f07618a3f222f88ac6b619f to your computer and use it in GitHub Desktop.

Select an option

Save ricrogz/25a621f29f07618a3f222f88ac6b619f to your computer and use it in GitHub Desktop.

Revisions

  1. ricrogz revised this gist Jun 14, 2020. 1 changed file with 33 additions and 68 deletions.
    101 changes: 33 additions & 68 deletions strip_ipynb.py
    Original file line number Diff line number Diff line change
    @@ -1,69 +1,34 @@
    #!/usr/bin/env python
    """strip outputs from an IPython Notebook
    Opens a notebook, strips its output, and writes the outputless version to the
    original file. Useful mainly as a git filter or pre-commit hook for users who
    don't want to track output in VCS. This does mostly the same thing as the
    `Clear All Output` command in the notebook UI.
    """

    #! /usr/bin/env python
    # this script filters output from ipython notebooks, for use in git repos
    # http://stackoverflow.com/questions/18734739/using-ipython-notebooks-under-version-control
    #
    # put this file in a `bin` directory in your home directory, then run the following commands:
    #
    # chmod a+x ~/bin/ipynb_output_filter.py
    # echo -e "*.ipynb \t filter=dropoutput_ipynb" >> ~/.gitattributes
    # git config --global core.attributesfile ~/.gitattributes
    # git config --global filter.dropoutput_ipynb.clean ~/bin/ipynb_output_filter.py
    # git config --global filter.dropoutput_ipynb.smudge cat

    # works with Notebook versions 3, 4 and 5 (iPython/Jupyter versions 2, 3 and 4)
    import sys
    from io import StringIO
    import codecs

    py2 = True

    if (sys.version_info > (3, 0)):
    py2 = False

    try:
    # Jupyter >= 4
    from nbformat import read, write, NO_CONVERT
    except ImportError:
    # IPython 3
    try:
    from IPython.nbformat import read, write, NO_CONVERT
    except ImportError:
    # IPython < 3
    from IPython.nbformat import current

    def read(f, as_version):
    return current.read(f, 'json')

    def write(nb, f):
    return current.write(nb, f, 'json')


    def _cells(nb):
    """Yield all cells in an nbformat-insensitive manner"""
    if nb.nbformat < 4:
    for ws in nb.worksheets:
    for cell in ws.cells:
    yield cell
    else:
    for cell in nb.cells:
    yield cell


    def strip_output(nb):
    """strip the outputs from a notebook object"""
    nb.metadata.pop('signature', None)
    for cell in _cells(nb):
    if 'outputs' in cell:
    cell['outputs'] = []
    if 'prompt_number' in cell:
    cell['prompt_number'] = 0
    if 'execution_count' in cell:
    cell['execution_count'] = None
    if 'metadata' in cell and 'ExecuteTime' in cell['metadata']:
    cell['metadata']['ExecuteTime'] = {}
    return nb


    if __name__ == '__main__':
    nb = None
    if py2:
    UTF8Reader = codecs.getreader('utf8')
    sys.stdin = UTF8Reader(sys.stdin)
    nb = read(StringIO(sys.stdin.read()), as_version=NO_CONVERT)
    nb = strip_output(nb)
    write(nb, sys.stdout)
    from nbformat import read, write, NO_CONVERT

    json_in = read(sys.stdin, NO_CONVERT)

    # detect earlier versions
    if ('worksheets' in json_in):
    # versions prior to 4 had a 'worksheets' field with a single element
    sheet = json_in.worksheets[0]
    else:
    sheet = json_in

    for cell in sheet.cells:
    if "outputs" in cell:
    cell.outputs = []
    if "prompt_number" in cell:
    cell.prompt_number = None
    if "execution_count" in cell:
    cell.execution_count = None

    write(json_in, sys.stdout, NO_CONVERT)
  2. ricrogz revised this gist Jun 14, 2020. 1 changed file with 0 additions and 12 deletions.
    12 changes: 0 additions & 12 deletions README
    Original file line number Diff line number Diff line change
    @@ -1,12 +0,0 @@
    This automatically strips Jupyter NoteBooks output cells when adding files to a git repository.
    This is useful to reduce garbage checked into the repo.

    Copy the Python script and the .gitattributes files to the repo, and set the config. Make the script executable.

    Might also be done with `git config filter.strip_ipynb.clean "python3 strip_ipynb.py" `.

    Based on the following:
    https://gist.github.com/minrk/6176788
    https://gist.github.com/waylonflinn/010f0a1a66760adf914f
    https://github.com/cfriedline/ipynb_template/blob/master/nbstripout
    https://github.com/kynan/nbstripout
  3. ricrogz created this gist Jun 14, 2020.
    2 changes: 2 additions & 0 deletions .git_config
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,2 @@
    [filter "strip_ipynb"]
    clean = python3 strip_ipynb.py
    1 change: 1 addition & 0 deletions .gitattributes
    Original file line number Diff line number Diff line change
    @@ -0,0 +1 @@
    *.ipynb filter=strip_ipynb
    12 changes: 12 additions & 0 deletions README
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,12 @@
    This automatically strips Jupyter NoteBooks output cells when adding files to a git repository.
    This is useful to reduce garbage checked into the repo.

    Copy the Python script and the .gitattributes files to the repo, and set the config. Make the script executable.

    Might also be done with `git config filter.strip_ipynb.clean "python3 strip_ipynb.py" `.

    Based on the following:
    https://gist.github.com/minrk/6176788
    https://gist.github.com/waylonflinn/010f0a1a66760adf914f
    https://github.com/cfriedline/ipynb_template/blob/master/nbstripout
    https://github.com/kynan/nbstripout
    69 changes: 69 additions & 0 deletions strip_ipynb.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,69 @@
    #!/usr/bin/env python
    """strip outputs from an IPython Notebook
    Opens a notebook, strips its output, and writes the outputless version to the
    original file. Useful mainly as a git filter or pre-commit hook for users who
    don't want to track output in VCS. This does mostly the same thing as the
    `Clear All Output` command in the notebook UI.
    """

    import sys
    from io import StringIO
    import codecs

    py2 = True

    if (sys.version_info > (3, 0)):
    py2 = False

    try:
    # Jupyter >= 4
    from nbformat import read, write, NO_CONVERT
    except ImportError:
    # IPython 3
    try:
    from IPython.nbformat import read, write, NO_CONVERT
    except ImportError:
    # IPython < 3
    from IPython.nbformat import current

    def read(f, as_version):
    return current.read(f, 'json')

    def write(nb, f):
    return current.write(nb, f, 'json')


    def _cells(nb):
    """Yield all cells in an nbformat-insensitive manner"""
    if nb.nbformat < 4:
    for ws in nb.worksheets:
    for cell in ws.cells:
    yield cell
    else:
    for cell in nb.cells:
    yield cell


    def strip_output(nb):
    """strip the outputs from a notebook object"""
    nb.metadata.pop('signature', None)
    for cell in _cells(nb):
    if 'outputs' in cell:
    cell['outputs'] = []
    if 'prompt_number' in cell:
    cell['prompt_number'] = 0
    if 'execution_count' in cell:
    cell['execution_count'] = None
    if 'metadata' in cell and 'ExecuteTime' in cell['metadata']:
    cell['metadata']['ExecuteTime'] = {}
    return nb


    if __name__ == '__main__':
    nb = None
    if py2:
    UTF8Reader = codecs.getreader('utf8')
    sys.stdin = UTF8Reader(sys.stdin)
    nb = read(StringIO(sys.stdin.read()), as_version=NO_CONVERT)
    nb = strip_output(nb)
    write(nb, sys.stdout)