Skip to content

Instantly share code, notes, and snippets.

@adamnew123456
Last active February 1, 2025 03:18
Show Gist options
  • Select an option

  • Save adamnew123456/37923cf53f51d6b9af32a539cdfa7cc4 to your computer and use it in GitHub Desktop.

Select an option

Save adamnew123456/37923cf53f51d6b9af32a539cdfa7cc4 to your computer and use it in GitHub Desktop.

Revisions

  1. adamnew123456 revised this gist Nov 6, 2020. 1 changed file with 25 additions and 0 deletions.
    25 changes: 25 additions & 0 deletions diff.py
    Original file line number Diff line number Diff line change
    @@ -1,3 +1,28 @@
    # This is free and unencumbered software released into the public domain.
    #
    # Anyone is free to copy, modify, publish, use, compile, sell, or
    # distribute this software, either in source code form or as a compiled
    # binary, for any purpose, commercial or non-commercial, and by any
    # means.
    #
    # In jurisdictions that recognize copyright laws, the author or authors
    # of this software dedicate any and all copyright interest in the
    # software to the public domain. We make this dedication for the benefit
    # of the public at large and to the detriment of our heirs and
    # successors. We intend this dedication to be an overt act of
    # relinquishment in perpetuity of all present and future rights to this
    # software under copyright law.
    #
    # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
    # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
    # IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
    # OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
    # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
    # OTHER DEALINGS IN THE SOFTWARE.
    #
    # For more information, please refer to <http://unlicense.org/>

    from __future__ import print_function # Py2 compat
    from collections import namedtuple
    import sys
  2. adamnew123456 created this gist Jul 27, 2016.
    111 changes: 111 additions & 0 deletions diff.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,111 @@
    from __future__ import print_function # Py2 compat
    from collections import namedtuple
    import sys

    # These define the structure of the history, and correspond to diff output with
    # lines that start with a space, a + and a - respectively.
    Keep = namedtuple('Keep', ['line'])
    Insert = namedtuple('Insert', ['line'])
    Remove = namedtuple('Remove', ['line'])

    # See frontier in myers_diff
    Frontier = namedtuple('Frontier', ['x', 'history'])

    def myers_diff(a_lines, b_lines):
    """
    An implementation of the Myers diff algorithm.
    See http://www.xmailserver.org/diff2.pdf
    """
    # This marks the farthest-right point along each diagonal in the edit
    # graph, along with the history that got it there
    frontier = {1: Frontier(0, [])}

    def one(idx):
    """
    The algorithm Myers presents is 1-indexed; since Python isn't, we
    need a conversion.
    """
    return idx - 1

    a_max = len(a_lines)
    b_max = len(b_lines)
    for d in range(0, a_max + b_max + 1):
    for k in range(-d, d + 1, 2):
    # This determines whether our next search point will be going down
    # in the edit graph, or to the right.
    #
    # The intuition for this is that we should go down if we're on the
    # left edge (k == -d) to make sure that the left edge is fully
    # explored.
    #
    # If we aren't on the top (k != d), then only go down if going down
    # would take us to territory that hasn't sufficiently been explored
    # yet.
    go_down = (k == -d or
    (k != d and frontier[k - 1].x < frontier[k + 1].x))

    # Figure out the starting point of this iteration. The diagonal
    # offsets come from the geometry of the edit grid - if you're going
    # down, your diagonal is lower, and if you're going right, your
    # diagonal is higher.
    if go_down:
    old_x, history = frontier[k + 1]
    x = old_x
    else:
    old_x, history = frontier[k - 1]
    x = old_x + 1

    # We want to avoid modifying the old history, since some other step
    # may decide to use it.
    history = history[:]
    y = x - k

    # We start at the invalid point (0, 0) - we should only start building
    # up history when we move off of it.
    if 1 <= y <= b_max and go_down:
    history.append(Insert(b_lines[one(y)]))
    elif 1 <= x <= a_max:
    history.append(Remove(a_lines[one(x)]))

    # Chew up as many diagonal moves as we can - these correspond to common lines,
    # and they're considered "free" by the algorithm because we want to maximize
    # the number of these in the output.
    while x < a_max and y < b_max and a_lines[one(x + 1)] == b_lines[one(y + 1)]:
    x += 1
    y += 1
    history.append(Keep(a_lines[one(x)]))

    if x >= a_max and y >= b_max:
    # If we're here, then we've traversed through the bottom-left corner,
    # and are done.
    return history
    else:
    frontier[k] = Frontier(x, history)

    assert False, 'Could not find edit script'

    def main():
    try:
    _, a_file, b_file = sys.argv
    except ValueError:
    print(sys.argv[0], '<FILE>', '<FILE>')
    return 1

    with open(a_file) as a_handle:
    a_lines = [line.rstrip() for line in a_handle]

    with open(b_file) as b_handle:
    b_lines = [line.rstrip() for line in b_handle]

    diff = myers_diff(a_lines, b_lines)
    for elem in diff:
    if isinstance(elem, Keep):
    print(' ' + elem.line)
    elif isinstance(elem, Insert):
    print('+' + elem.line)
    else:
    print('-' + elem.line)

    if __name__ == '__main__':
    sys.exit(main())